Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -1794,6 +1794,209 @@ TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc","","avx512vl") TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_mask, "V32sV8sV32sUi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarhi512_mask, "V32sV32sV32sV32sUi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_512_mask, "V16fV4fV16fUs","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_broadcastf32x8_512_mask, "V16fV8fV16fUs","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_broadcastf64x2_512_mask, "V8dV2dV8dUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_512_mask, "V16iV4iV16iUs","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_broadcasti32x8_512_mask, "V16iV8iV16iUs","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_512_mask, "V8LLiV2LLiV8LLiUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_insertf64x2_512_mask, "V8dV8dV2dIiV8dUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_inserti32x8_mask, "V16iV16iV8iIiV16iUs","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_inserti64x2_512_mask, "V8LLiV8LLiV2LLiIiV8LLiUc","","avx512dq") +TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi","","") +TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi","","") +TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dvC*V8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLivC*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fvC*V16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16ivC*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc","","") +TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_insertf64x4_mask, "V8dV8dV4dIiV8dUc","","") +TARGET_BUILTIN(__builtin_ia32_inserti64x4_mask, "V8LLiV8LLiV4LLiIiV8LLiUc","","") +TARGET_BUILTIN(__builtin_ia32_movshdup512_mask, "V16fV16fV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_movsldup512_mask, "V16fV16fV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permdf512_mask, "V8dV8dUcV8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permdi512_mask, "V8LLiV8LLiUcV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvardf512_mask, "V8dV8dV8LLiV8dUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvardi512_mask, "V8LLiV8LLiV8LLiV8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsf512_mask, "V16fV16fV16iV16fUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsi512_mask, "V16iV16iV16iV16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarqi512_mask, "V64cV64cV64cV64cULLi","","avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_permvarqi128_mask, "V16cV16cV16cV16cUs","","avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarqi256_mask, "V32cV32cV32cV32cUi","","avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pbroadcastb128_mask, "V16cV16cV16cUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastb256_mask, "V32cV16cV32cUi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_mask, "V8sV8sV8sUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_mask, "V16sV8sV16sUs","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarhi128_mask, "V8sV8sV8sV8sUc","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi256_mask, "V16sV16sV16sV16sUs","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcastf32x2_256_mask, "V8fV4fV8fUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcastf64x2_256_mask, "V4dV2dV4dUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_128_mask, "V4iV4iV4iUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcasti32x2_256_mask, "V8iV4iV8iUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcasti64x2_256_mask, "V4LLiV2LLiV4LLiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf64x2_256_mask, "V4dV4dV2dIiV4dUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_inserti64x2_256_mask, "V4LLiV4LLiV2LLiIiV4LLiUc","","avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignd128_mask, "V4iV4iV4iIiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignd256_mask, "V8iV8iV8iIiV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcastf32x4_256_mask, "V8fV4fV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcasti32x4_256_mask, "V8iV4iV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_broadcastsd256_mask, "V4dV2dV4dUc","","") +TARGET_BUILTIN(__builtin_ia32_broadcastss128_mask, "V4fV4fV4fUc","","") +TARGET_BUILTIN(__builtin_ia32_broadcastss256_mask, "V8fV4fV8fUc","","") +TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x4_256_mask, "V8fV8fV4fIiV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_inserti32x4_256_mask, "V8iV8iV4iIiV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movshdup128_mask, "V4fV4fV4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movshdup256_mask, "V8fV8fV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movsldup128_mask, "V4fV4fV4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_movsldup256_mask, "V8fV8fV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pbroadcastd128_mask, "V4iV4iV4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastd256_mask, "V8iV4iV8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastq128_mask, "V2LLiV2LLiV2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastq256_mask, "V4LLiV2LLiV4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_permdf256_mask, "V4dV4dUcV4dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_permdi256_mask, "V4LLiV4LLiUcV4LLiUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardf256_mask, "V4dV4dV4LLiV4dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardi256_mask, "V4LLiV4LLiV4LLiV4LLiUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarsf256_mask, "V8fV8fV8iV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarsi256_mask, "V8iV8iV8iV8iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd256_mask, "V4iV4LLiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastb512_mask, "V64cV16cV64cULLi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_pbroadcastw512_gpr_mask, "V32shV32sUi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_pbroadcastw256_gpr_mask, "V16shV16sUs","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pbroadcastw128_gpr_mask, "V8ssV8sUc","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fiV8fUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8diV8dUcIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fiV16fUsIi","","avx512f") +TARGET_BUILTIN(__builtin_ia32_palignr512_mask, "V64cV64cV64ciV64cULLi","","avx512bw") +TARGET_BUILTIN(__builtin_ia32_palignr128_mask, "V16cV16cV16ciV16cUs","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_palignr256_mask, "V32cV32cV32ciV32cUi","","avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pshufd128_mask, "V4iV4iCsV4iUc","","avx512vl") +TARGET_BUILTIN(__builtin_ia32_pshufd256_mask, "V8iV8iCsV8iUc","","avx512vl") #undef BUILTIN #undef TARGET_BUILTIN Index: lib/Headers/avx512bwintrin.h =================================================================== --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -1880,8 +1880,6 @@ (__mmask32)( __U));\ }) - - static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_mov_epi16 (__m512i __W, __mmask32 __U, __m512i __A) { @@ -1916,7 +1914,6 @@ (__mmask64) __U); } - static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mask_set1_epi8 (__m512i __O, __mmask64 __M, char __A) { @@ -1981,6 +1978,150 @@ _mm512_setzero_hi (), (__mmask64) __U); } + + +#define _mm512_dbsad_epu8( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw512_mask ((__v64qi)( __A),\ + (__v64qi)( __B),\ + ( __imm),\ + (__v32hi)\ + _mm512_setzero_hi (),\ + (__mmask32) -1);\ +}) + +#define _mm512_mask_dbsad_epu8( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw512_mask ((__v64qi)( __A),\ + (__v64qi)( __B),\ + ( __imm),\ + (__v32hi)( __W),\ + (__mmask32)( __U));\ +}) + +#define _mm512_maskz_dbsad_epu8( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw512_mask ((__v64qi)( __A),\ + (__v64qi)( __B),\ + ( __imm),\ + (__v32hi)\ + _mm512_setzero_hi(),\ + (__mmask32)( __U));\ +}) + +#define _mm512_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr512_mask ((__v8di)( __A),\ + (__v8di)( __B),\ + (__N) * 8,\ + (__v8di)( __W),\ + (__mmask64)( __U));\ +}) + +#define _mm512_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr512_mask ((__v8di)( __A),\ + (__v8di)( __B),\ + ( __N) * 8,\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask64)( __U));\ +}) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_broadcastb_epi8 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, + (__v64qi)_mm512_setzero_si512(), + (__mmask64) - + 1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_broadcastb_epi8 (__m512i __O, __mmask64 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, + (__v64qi) __O, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_broadcastb_epi8 (__mmask64 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastb512_mask ((__v16qi) __A, + (__v64qi) + _mm512_setzero_qi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_set1_epi16 (__m512i __O, __mmask32 __M, short __A) +{ + return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A, + (__v32hi) __O, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_set1_epi16 (__mmask32 __M, short __A) +{ + return (__m512i) __builtin_ia32_pbroadcastw512_gpr_mask (__A, + (__v32hi) + _mm512_setzero_hi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_broadcastw_epi16 (__m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, + (__v32hi)_mm512_setzero_si512(), + (__mmask32)-1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_broadcastw_epi16 (__m512i __O, __mmask32 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, + (__v32hi) __O, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_broadcastw_epi16 (__mmask32 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_pbroadcastw512_mask ((__v8hi) __A, + (__v32hi) + _mm512_setzero_hi(), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_permutexvar_epi16 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, + (__v32hi) __A, + (__v32hi) + _mm512_setzero_hi (), + (__mmask32) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_permutexvar_epi16 (__mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, + (__v32hi) __A, + (__v32hi) + _mm512_setzero_hi(), + (__mmask32) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_permutexvar_epi16 (__m512i __W, __mmask32 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_permvarhi512_mask ((__v32hi) __B, + (__v32hi) __A, + (__v32hi) __W, + (__mmask32) __M); +} + #undef __DEFAULT_FN_ATTRS #endif Index: lib/Headers/avx512dqintrin.h =================================================================== --- lib/Headers/avx512dqintrin.h +++ lib/Headers/avx512dqintrin.h @@ -773,6 +773,381 @@ (__m512) __builtin_ia32_reduceps512_mask ((__v16sf) __A, __B, \ (__v16sf) _mm512_setzero_ps(), (__mmask16) __U, __R);}) + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_broadcast_f32x2 (__m128 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf)_mm512_undefined_ps(), + (__mmask16) - + 1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_broadcast_f32x2 (__m512 __O, __mmask16 __M, __m128 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf) + __O, __M); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_broadcast_f32x2 (__mmask16 __M, __m128 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x2_512_mask ((__v4sf) __A, + (__v16sf) + _mm512_setzero_ps (), + __M); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_broadcast_f32x8 (__m256 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + _mm512_undefined_ps(), + (__mmask16) - + 1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_broadcast_f32x8 (__m512 __O, __mmask16 __M, __m256 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + (__v16sf)__O, + __M); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_broadcast_f32x8 (__mmask16 __M, __m256 __A) +{ + return (__m512) __builtin_ia32_broadcastf32x8_512_mask ((__v8sf) __A, + (__v16sf) + _mm512_setzero_ps (), + __M); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_broadcast_f64x2 (__m128d __A) +{ + return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) + __A, + _mm512_undefined_pd(), + (__mmask8) - + 1); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_broadcast_f64x2 (__m512d __O, __mmask8 __M, __m128d __A) +{ + return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) + __A, + (__v8df) + __O, __M); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) +{ + return (__m512d) __builtin_ia32_broadcastf64x2_512_mask ((__v2df) + __A, + (__v8df) + _mm512_setzero_ps (), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_broadcast_i32x2 (__m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) + __A, + (__v16si)_mm512_setzero_si512(), + (__mmask16) + -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_broadcast_i32x2 (__m512i __O, __mmask16 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) + __A, + (__v16si) + __O, __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_broadcast_i32x2 (__mmask16 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x2_512_mask ((__v4si) + __A, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_broadcast_i32x8 (__m256i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) + __A, + (__v16si)_mm512_setzero_si512(), + (__mmask16) + -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_broadcast_i32x8 (__m512i __O, __mmask16 __M, __m256i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) + __A, + (__v16si)__O, + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_broadcast_i32x8 (__mmask16 __M, __m256i __A) +{ + return (__m512i) __builtin_ia32_broadcasti32x8_512_mask ((__v8si) + __A, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_broadcast_i64x2 (__m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) + __A, + _mm512_setzero_si512(), + (__mmask8) - + 1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_broadcast_i64x2 (__m512i __O, __mmask8 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) + __A, + (__v8di) + __O, __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) +{ + return (__m512i) __builtin_ia32_broadcasti64x2_512_mask ((__v2di) + __A, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +#define _mm512_extractf32x8_ps( __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf32x8_mask ((__v16sf)( __A),\ + ( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_extractf32x8_ps( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf32x8_mask ((__v16sf)( __A),\ + ( __imm),\ + (__v8sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_extractf32x8_ps( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf32x8_mask ((__v16sf)( __A),\ + ( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_extractf64x2_pd( __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf64x2_512_mask ((__v8df)( __A),\ + ( __imm),\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm512_mask_extractf64x2_pd( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf64x2_512_mask ((__v8df)( __A),\ + ( __imm),\ + (__v2df)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_maskz_extractf64x2_pd( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf64x2_512_mask ((__v8df)( __A),\ + ( __imm),\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_extracti32x8_epi32( __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x8_mask ((__v16si)( __A),\ + ( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_extracti32x8_epi32( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x8_mask ((__v16si)( __A),\ + ( __imm),\ + (__v8si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_extracti32x8_epi32( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x8_mask ((__v16si)( __A),\ + ( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_extracti64x2_epi64( __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x2_512_mask ((__v8di)( __A),\ + ( __imm),\ + (__v2di)\ + _mm_setzero_di (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm512_mask_extracti64x2_epi64( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x2_512_mask ((__v8di)( __A),\ + ( __imm),\ + (__v2di)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_maskz_extracti64x2_epi64( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x2_512_mask ((__v8di)( __A),\ + ( __imm),\ + (__v2di)\ + _mm_setzero_di (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_mask_fpclass_pd_mask( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclasspd512_mask ((__v8df)( __A),\ + ( __imm),( __U));\ +}) + +#define _mm512_fpclass_pd_mask( __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclasspd512_mask ((__v8df)( __A),\ + ( __imm),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_fpclass_ps_mask( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclassps512_mask ((__v16sf)( __A),\ + ( __imm),\ + ( __U));\ +}) + +#define _mm512_fpclass_ps_mask( __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclassps512_mask ((__v16sf)( __A),\ + ( __imm),\ + (__mmask16) -\ + 1);\ +}) + +#define _mm512_insertf64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x2_512_mask ((__v8df)( __A),\ + (__v2df)( __B),\ + ( __imm),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm512_mask_insertf64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x2_512_mask ((__v8df)( __A),\ + (__v2df)( __B),\ + ( __imm),\ + (__v8df)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_maskz_insertf64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x2_512_mask ((__v8df)( __A),\ + (__v2df)( __B),\ + ( __imm),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_inserti32x8( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x8_mask ((__v16si)( __A),\ + (__v8si)( __B),\ + ( __imm),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16) -1);\ +}) + +#define _mm512_mask_inserti32x8( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x8_mask ((__v16si)( __A),\ + (__v8si)( __B),\ + ( __imm),\ + (__v16si)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm512_maskz_inserti32x8( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x8_mask ((__v16si)( __A),\ + (__v8si)( __B),\ + ( __imm),\ + (__v16si)\ + _mm512_setzero_si512 (),\ + (__mmask16)( __U));\ +}) + +#define _mm512_inserti64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x2_512_mask ((__v8di)( __A),\ + (__v2di)( __B),\ + ( __imm),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm512_mask_inserti64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x2_512_mask ((__v8di)( __A),\ + (__v2di)( __B),\ + ( __imm),\ + (__v8di)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm512_maskz_inserti64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x2_512_mask ((__v8di)( __A),\ + (__v2di)( __B),\ + ( __imm),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8)\ + ( __U));\ +}) + #undef __DEFAULT_FN_ATTRS #endif Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -48,6 +48,96 @@ typedef enum { + _MM_PERM_AAAA = 0x00, _MM_PERM_AAAB = 0x01, _MM_PERM_AAAC = 0x02, + _MM_PERM_AAAD = 0x03, _MM_PERM_AABA = 0x04, _MM_PERM_AABB = 0x05, + _MM_PERM_AABC = 0x06, _MM_PERM_AABD = 0x07, _MM_PERM_AACA = 0x08, + _MM_PERM_AACB = 0x09, _MM_PERM_AACC = 0x0A, _MM_PERM_AACD = 0x0B, + _MM_PERM_AADA = 0x0C, _MM_PERM_AADB = 0x0D, _MM_PERM_AADC = 0x0E, + _MM_PERM_AADD = 0x0F, _MM_PERM_ABAA = 0x10, _MM_PERM_ABAB = 0x11, + _MM_PERM_ABAC = 0x12, _MM_PERM_ABAD = 0x13, _MM_PERM_ABBA = 0x14, + _MM_PERM_ABBB = 0x15, _MM_PERM_ABBC = 0x16, _MM_PERM_ABBD = 0x17, + _MM_PERM_ABCA = 0x18, _MM_PERM_ABCB = 0x19, _MM_PERM_ABCC = 0x1A, + _MM_PERM_ABCD = 0x1B, _MM_PERM_ABDA = 0x1C, _MM_PERM_ABDB = 0x1D, + _MM_PERM_ABDC = 0x1E, _MM_PERM_ABDD = 0x1F, _MM_PERM_ACAA = 0x20, + _MM_PERM_ACAB = 0x21, _MM_PERM_ACAC = 0x22, _MM_PERM_ACAD = 0x23, + _MM_PERM_ACBA = 0x24, _MM_PERM_ACBB = 0x25, _MM_PERM_ACBC = 0x26, + _MM_PERM_ACBD = 0x27, _MM_PERM_ACCA = 0x28, _MM_PERM_ACCB = 0x29, + _MM_PERM_ACCC = 0x2A, _MM_PERM_ACCD = 0x2B, _MM_PERM_ACDA = 0x2C, + _MM_PERM_ACDB = 0x2D, _MM_PERM_ACDC = 0x2E, _MM_PERM_ACDD = 0x2F, + _MM_PERM_ADAA = 0x30, _MM_PERM_ADAB = 0x31, _MM_PERM_ADAC = 0x32, + _MM_PERM_ADAD = 0x33, _MM_PERM_ADBA = 0x34, _MM_PERM_ADBB = 0x35, + _MM_PERM_ADBC = 0x36, _MM_PERM_ADBD = 0x37, _MM_PERM_ADCA = 0x38, + _MM_PERM_ADCB = 0x39, _MM_PERM_ADCC = 0x3A, _MM_PERM_ADCD = 0x3B, + _MM_PERM_ADDA = 0x3C, _MM_PERM_ADDB = 0x3D, _MM_PERM_ADDC = 0x3E, + _MM_PERM_ADDD = 0x3F, _MM_PERM_BAAA = 0x40, _MM_PERM_BAAB = 0x41, + _MM_PERM_BAAC = 0x42, _MM_PERM_BAAD = 0x43, _MM_PERM_BABA = 0x44, + _MM_PERM_BABB = 0x45, _MM_PERM_BABC = 0x46, _MM_PERM_BABD = 0x47, + _MM_PERM_BACA = 0x48, _MM_PERM_BACB = 0x49, _MM_PERM_BACC = 0x4A, + _MM_PERM_BACD = 0x4B, _MM_PERM_BADA = 0x4C, _MM_PERM_BADB = 0x4D, + _MM_PERM_BADC = 0x4E, _MM_PERM_BADD = 0x4F, _MM_PERM_BBAA = 0x50, + _MM_PERM_BBAB = 0x51, _MM_PERM_BBAC = 0x52, _MM_PERM_BBAD = 0x53, + _MM_PERM_BBBA = 0x54, _MM_PERM_BBBB = 0x55, _MM_PERM_BBBC = 0x56, + _MM_PERM_BBBD = 0x57, _MM_PERM_BBCA = 0x58, _MM_PERM_BBCB = 0x59, + _MM_PERM_BBCC = 0x5A, _MM_PERM_BBCD = 0x5B, _MM_PERM_BBDA = 0x5C, + _MM_PERM_BBDB = 0x5D, _MM_PERM_BBDC = 0x5E, _MM_PERM_BBDD = 0x5F, + _MM_PERM_BCAA = 0x60, _MM_PERM_BCAB = 0x61, _MM_PERM_BCAC = 0x62, + _MM_PERM_BCAD = 0x63, _MM_PERM_BCBA = 0x64, _MM_PERM_BCBB = 0x65, + _MM_PERM_BCBC = 0x66, _MM_PERM_BCBD = 0x67, _MM_PERM_BCCA = 0x68, + _MM_PERM_BCCB = 0x69, _MM_PERM_BCCC = 0x6A, _MM_PERM_BCCD = 0x6B, + _MM_PERM_BCDA = 0x6C, _MM_PERM_BCDB = 0x6D, _MM_PERM_BCDC = 0x6E, + _MM_PERM_BCDD = 0x6F, _MM_PERM_BDAA = 0x70, _MM_PERM_BDAB = 0x71, + _MM_PERM_BDAC = 0x72, _MM_PERM_BDAD = 0x73, _MM_PERM_BDBA = 0x74, + _MM_PERM_BDBB = 0x75, _MM_PERM_BDBC = 0x76, _MM_PERM_BDBD = 0x77, + _MM_PERM_BDCA = 0x78, _MM_PERM_BDCB = 0x79, _MM_PERM_BDCC = 0x7A, + _MM_PERM_BDCD = 0x7B, _MM_PERM_BDDA = 0x7C, _MM_PERM_BDDB = 0x7D, + _MM_PERM_BDDC = 0x7E, _MM_PERM_BDDD = 0x7F, _MM_PERM_CAAA = 0x80, + _MM_PERM_CAAB = 0x81, _MM_PERM_CAAC = 0x82, _MM_PERM_CAAD = 0x83, + _MM_PERM_CABA = 0x84, _MM_PERM_CABB = 0x85, _MM_PERM_CABC = 0x86, + _MM_PERM_CABD = 0x87, _MM_PERM_CACA = 0x88, _MM_PERM_CACB = 0x89, + _MM_PERM_CACC = 0x8A, _MM_PERM_CACD = 0x8B, _MM_PERM_CADA = 0x8C, + _MM_PERM_CADB = 0x8D, _MM_PERM_CADC = 0x8E, _MM_PERM_CADD = 0x8F, + _MM_PERM_CBAA = 0x90, _MM_PERM_CBAB = 0x91, _MM_PERM_CBAC = 0x92, + _MM_PERM_CBAD = 0x93, _MM_PERM_CBBA = 0x94, _MM_PERM_CBBB = 0x95, + _MM_PERM_CBBC = 0x96, _MM_PERM_CBBD = 0x97, _MM_PERM_CBCA = 0x98, + _MM_PERM_CBCB = 0x99, _MM_PERM_CBCC = 0x9A, _MM_PERM_CBCD = 0x9B, + _MM_PERM_CBDA = 0x9C, _MM_PERM_CBDB = 0x9D, _MM_PERM_CBDC = 0x9E, + _MM_PERM_CBDD = 0x9F, _MM_PERM_CCAA = 0xA0, _MM_PERM_CCAB = 0xA1, + _MM_PERM_CCAC = 0xA2, _MM_PERM_CCAD = 0xA3, _MM_PERM_CCBA = 0xA4, + _MM_PERM_CCBB = 0xA5, _MM_PERM_CCBC = 0xA6, _MM_PERM_CCBD = 0xA7, + _MM_PERM_CCCA = 0xA8, _MM_PERM_CCCB = 0xA9, _MM_PERM_CCCC = 0xAA, + _MM_PERM_CCCD = 0xAB, _MM_PERM_CCDA = 0xAC, _MM_PERM_CCDB = 0xAD, + _MM_PERM_CCDC = 0xAE, _MM_PERM_CCDD = 0xAF, _MM_PERM_CDAA = 0xB0, + _MM_PERM_CDAB = 0xB1, _MM_PERM_CDAC = 0xB2, _MM_PERM_CDAD = 0xB3, + _MM_PERM_CDBA = 0xB4, _MM_PERM_CDBB = 0xB5, _MM_PERM_CDBC = 0xB6, + _MM_PERM_CDBD = 0xB7, _MM_PERM_CDCA = 0xB8, _MM_PERM_CDCB = 0xB9, + _MM_PERM_CDCC = 0xBA, _MM_PERM_CDCD = 0xBB, _MM_PERM_CDDA = 0xBC, + _MM_PERM_CDDB = 0xBD, _MM_PERM_CDDC = 0xBE, _MM_PERM_CDDD = 0xBF, + _MM_PERM_DAAA = 0xC0, _MM_PERM_DAAB = 0xC1, _MM_PERM_DAAC = 0xC2, + _MM_PERM_DAAD = 0xC3, _MM_PERM_DABA = 0xC4, _MM_PERM_DABB = 0xC5, + _MM_PERM_DABC = 0xC6, _MM_PERM_DABD = 0xC7, _MM_PERM_DACA = 0xC8, + _MM_PERM_DACB = 0xC9, _MM_PERM_DACC = 0xCA, _MM_PERM_DACD = 0xCB, + _MM_PERM_DADA = 0xCC, _MM_PERM_DADB = 0xCD, _MM_PERM_DADC = 0xCE, + _MM_PERM_DADD = 0xCF, _MM_PERM_DBAA = 0xD0, _MM_PERM_DBAB = 0xD1, + _MM_PERM_DBAC = 0xD2, _MM_PERM_DBAD = 0xD3, _MM_PERM_DBBA = 0xD4, + _MM_PERM_DBBB = 0xD5, _MM_PERM_DBBC = 0xD6, _MM_PERM_DBBD = 0xD7, + _MM_PERM_DBCA = 0xD8, _MM_PERM_DBCB = 0xD9, _MM_PERM_DBCC = 0xDA, + _MM_PERM_DBCD = 0xDB, _MM_PERM_DBDA = 0xDC, _MM_PERM_DBDB = 0xDD, + _MM_PERM_DBDC = 0xDE, _MM_PERM_DBDD = 0xDF, _MM_PERM_DCAA = 0xE0, + _MM_PERM_DCAB = 0xE1, _MM_PERM_DCAC = 0xE2, _MM_PERM_DCAD = 0xE3, + _MM_PERM_DCBA = 0xE4, _MM_PERM_DCBB = 0xE5, _MM_PERM_DCBC = 0xE6, + _MM_PERM_DCBD = 0xE7, _MM_PERM_DCCA = 0xE8, _MM_PERM_DCCB = 0xE9, + _MM_PERM_DCCC = 0xEA, _MM_PERM_DCCD = 0xEB, _MM_PERM_DCDA = 0xEC, + _MM_PERM_DCDB = 0xED, _MM_PERM_DCDC = 0xEE, _MM_PERM_DCDD = 0xEF, + _MM_PERM_DDAA = 0xF0, _MM_PERM_DDAB = 0xF1, _MM_PERM_DDAC = 0xF2, + _MM_PERM_DDAD = 0xF3, _MM_PERM_DDBA = 0xF4, _MM_PERM_DDBB = 0xF5, + _MM_PERM_DDBC = 0xF6, _MM_PERM_DDBD = 0xF7, _MM_PERM_DDCA = 0xF8, + _MM_PERM_DDCB = 0xF9, _MM_PERM_DDCC = 0xFA, _MM_PERM_DDCD = 0xFB, + _MM_PERM_DDDA = 0xFC, _MM_PERM_DDDB = 0xFD, _MM_PERM_DDDC = 0xFE, + _MM_PERM_DDDD = 0xFF +} _MM_PERM_ENUM; + +typedef enum +{ _MM_MANT_NORM_1_2, /* interval [1, 2) */ _MM_MANT_NORM_p5_2, /* interval [0.5, 2) */ _MM_MANT_NORM_p5_1, /* interval [0.5, 1) */ @@ -3975,6 +4065,1251 @@ }) + +#define _mm_cmp_round_sd_mask( __X, __Y, __P,__R) __extension__ ({ \ +__builtin_ia32_cmpsd_mask ((__v2df)( __X),\ + (__v2df)( __Y), __P,\ + (__mmask8) -1, __R);\ +}) + +#define _mm_mask_cmp_round_sd_mask( __M, __X, __Y, __P, __R) __extension__ ({ \ +__builtin_ia32_cmpsd_mask ((__v2df)( __X),\ + (__v2df)( __Y), __P,\ + (__mmask8)( __M), __R);\ +}) + +#define _mm_cmp_sd_mask( __X, __Y, __P) __extension__ ({ \ +__builtin_ia32_cmpsd_mask ((__v2df)( __X),\ + (__v2df)( __Y),( __P),\ + (__mmask8) -1,\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_mask_cmp_sd_mask( __M, __X, __Y, __P) __extension__ ({ \ +__builtin_ia32_cmpsd_mask ((__v2df)( __X),\ + (__v2df)( __Y),( __P),\ + (__mmask8)( __M),\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_cmp_round_ss_mask( __X, __Y, __P, __R) __extension__ ({ \ +__builtin_ia32_cmpss_mask ((__v4sf)( __X),\ + (__v4sf)( __Y), __P,\ + (__mmask8) -1, __R);\ +}) + +#define _mm_mask_cmp_round_ss_mask( __M, __X, __Y, __P, __R) __extension__ ({ \ +__builtin_ia32_cmpss_mask ((__v4sf)( __X),\ + (__v4sf)( __Y), __P,\ + (__mmask8)( __M), __R);\ +}) + +#define _mm_cmp_ss_mask( __X, __Y, __P) __extension__ ({ \ +__builtin_ia32_cmpss_mask ((__v4sf)( __X),\ + (__v4sf)( __Y),( __P),\ + (__mmask8) -1,\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm_mask_cmp_ss_mask( __M, __X, __Y, __P) __extension__ ({ \ +__builtin_ia32_cmpss_mask ((__v4sf)( __X),\ + (__v4sf)( __Y),( __P),\ + (__mmask8)( __M),\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) __U); +} + +#define _mm512_cvt_roundps_pd( __A, __R) __extension__ ({ \ +__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\ + (__v8df)\ + _mm512_undefined_pd (),\ + (__mmask8) -1,( __R));\ +}) + +#define _mm512_mask_cvt_roundps_pd( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\ + (__v8df)( __W),\ + (__mmask8)( __U),( __R));\ +}) + +#define _mm512_maskz_cvt_roundps_pd( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvtps2pd512_mask ((__v8sf)( __A),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)( __U),( __R));\ +}) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_cvtps_pd (__m256 __A) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) +{ + return (__m512d) __builtin_ia32_cvtps2pd512_mask ((__v8sf) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_cvtt_roundpd_epu32( __A, __R) __extension__ ({ \ +__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\ + (__v8si)\ + _mm256_undefined_si256 (),\ + (__mmask8) -1,( __R));\ +}) + +#define _mm512_mask_cvtt_roundpd_epu32( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\ + (__v8si)( __W),\ + (__mmask8)( __U),( __R));\ +}) + +#define _mm512_maskz_cvtt_roundpd_epu32( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_cvttpd2udq512_mask ((__v8df)( __A),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U),( __R));\ +}) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvttpd_epu32 (__m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) +{ + return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, + (__v8si) + _mm256_setzero_si256 (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) +{ + return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, + (__v8di) __W, + (__mmask8) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) +{ + return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) +{ + return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, + (__v16si) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) +{ + return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, + (__v16si) __W, + (__mmask16) __U); +} + +#define _mm512_extracti32x4_epi32( __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\ + ( __imm),\ + (__v4si)\ + _mm_undefined_si128 (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_extracti32x4_epi32( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\ + ( __imm),\ + (__v4si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_extracti32x4_epi32( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x4_mask ((__v16si)( __A),\ + ( __imm),\ + (__v4si)\ + _mm_setzero_si128 (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_extracti64x4_epi64( __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\ + ( __imm),\ + (__v4di)\ + _mm256_undefined_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_extracti64x4_epi64( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\ + ( __imm),\ + (__v4di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_extracti64x4_epi64( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x4_mask ((__v8di)( __A),\ + ( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_getexp_round_pd( __A, __R) __extension__ ({ \ +__builtin_ia32_getexppd512_mask ((__v8df)( __A),\ + (__v8df)\ + _mm512_undefined_pd (),\ + (__mmask8) -1,( __R));\ +}) + +#define _mm512_mask_getexp_round_pd( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_getexppd512_mask ((__v8df)( __A),\ + (__v8df)( __W),\ + (__mmask8)( __U),( __R));\ +}) + +#define _mm512_maskz_getexp_round_pd( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_getexppd512_mask ((__v8df)( __A),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)( __U),( __R));\ +}) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_getexp_pd (__m512d __A) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) __W, + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) +{ + return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_getexp_round_ps( __A, __R) __extension__ ({ \ +__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\ + (__v16sf)\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_getexp_round_ps( __W, __U, __A, __R) __extension__ ({ \ +__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\ + (__v16sf)( __W),\ + (__mmask16)( __U),( __R));\ +}) + +#define _mm512_maskz_getexp_round_ps( __U, __A, __R) __extension__ ({ \ +__builtin_ia32_getexpps512_mask ((__v16sf)( __A),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + (__mmask16)( __U),( __R));\ +}) + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_getexp_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U, + _MM_FROUND_CUR_DIRECTION); +} + +#define _mm512_getmant_round_pd( __A, __B, __C, __R) __extension__ ({ \ +__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ + (__C << 2) |( __B),\ + _mm512_undefined_pd (),\ + (__mmask8) -1,( __R));\ +}) + +#define _mm512_mask_getmant_round_pd( __W, __U, __A, __B, __C, __R) __extension__ ({ \ +__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ + (__C << 2) |( __B),\ + (__v8df)( __W),( __U),\ + ( __R));\ +}) + +#define _mm512_maskz_getmant_round_pd( __U, __A, __B, __C, __R) __extension__ ({ \ +__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ + (__C << 2) |( __B),\ + (__v8df)\ + _mm512_setzero_pd (),\ + ( __U),( __R));\ +}) + +#define _mm512_getmant_pd( __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ + (__C << 2) |( __B),\ + (__v8df) _mm512_setzero_pd (),\ + (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm512_mask_getmant_pd( __W, __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ + (__C << 2) |( __B),\ + (__v8df)( __W), ( __U), _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm512_maskz_getmant_pd( __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantpd512_mask ((__v8df)( __A),\ + (__C << 2) |( __B),\ + (__v8df)\ + _mm512_setzero_pd (),\ + ( __U), _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm512_getmant_round_ps( __A, __B, __C, __R) __extension__ ({ \ +__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\ + (__C << 2) |( __B),\ + _mm512_undefined_ps (),\ + (__mmask16) -1,( __R));\ +}) + +#define _mm512_mask_getmant_round_ps( __W, __U, __A, __B, __C, __R) __extension__ ({ \ +__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\ + (__C << 2) |( __B),\ + (__v16sf)( __W),( __U),\ + ( __R));\ +}) + +#define _mm512_maskz_getmant_round_ps( __U, __A, __B, __C, __R) __extension__ ({ \ +__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\ + (__C << 2) |( __B),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + ( __U),( __R));\ +}) + +#define _mm512_getmant_ps( __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\ + (__C << 2) |( __B),\ + _mm512_undefined_ps (),\ + (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm512_mask_getmant_ps( __W, __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\ + (__C << 2) |( __B),\ + (__v16sf)( __W),( __U),\ + _MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm512_maskz_getmant_ps( __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps512_mask ((__v16sf)( __A),\ + (__C << 2) |( __B),\ + (__v16sf)\ + _mm512_setzero_ps (),\ + ( __U),_MM_FROUND_CUR_DIRECTION);\ +}) + +#define _mm512_insertf64x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\ + (__v4df)( __B),\ + ( __imm),\ + (__v8df)\ + _mm512_undefined_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_insertf64x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\ + (__v4df)( __B),\ + ( __imm),\ + (__v8df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_insertf64x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x4_mask ((__v8df)( __A),\ + (__v4df)( __B),\ + ( __imm),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_inserti64x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\ + (__v4di)( __B),\ + ( __imm),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_inserti64x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\ + (__v4di)( __B),\ + ( __imm),\ + (__v8di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_inserti64x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x4_mask ((__v8di)( __A),\ + (__v4di)( __B),\ + ( __imm),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8)( __U));\ +}) + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_movehdup_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movshdup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_moveldup_ps (__m512 __A) +{ + return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) +{ + return (__m512) __builtin_ia32_movsldup512_mask ((__v16sf) __A, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +#define _mm512_permutex_pd( __X, __M) __extension__ ({ \ +__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\ + (__v8df)\ + _mm512_undefined_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm512_mask_permutex_pd( __W, __U, __X, __M) __extension__ ({ \ +__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\ + (__v8df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm512_maskz_permutex_pd( __U, __X, __M) __extension__ ({ \ +__builtin_ia32_permdf512_mask ((__v8df)( __X),( __M),\ + (__v8df)\ + _mm512_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm512_permutex_epi64( __X, __I) __extension__ ({ \ +__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8) (-1));\ +}) + +#define _mm512_mask_permutex_epi64( __W, __M, __X, __I) __extension__ ({ \ +__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\ + (__v8di)( __W),\ + (__mmask8)( __M));\ +}) + +#define _mm512_maskz_permutex_epi64( __M, __X, __I) __extension__ ({ \ +__builtin_ia32_permdi512_mask ((__v8di)( __X),( __I),\ + (__v8di)\ + _mm512_setzero_si512 (),\ + (__mmask8)( __M));\ +}) + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_permutexvar_pd (__m512i __X, __m512d __Y) +{ + return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, + (__v8di) __X, + (__v8df) + _mm512_undefined_pd (), + (__mmask8) -1); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) +{ + return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, + (__v8di) __X, + (__v8df) __W, + (__mmask8) __U); +} + +static __inline__ __m512d __DEFAULT_FN_ATTRS +_mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) +{ + return (__m512d) __builtin_ia32_permvardf512_mask ((__v8df) __Y, + (__v8di) __X, + (__v8df) + _mm512_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, + (__v8di) __X, + (__v8di) + _mm512_setzero_si512 (), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, + (__v8di) __X, + (__v8di) + _mm512_setzero_si512 (), + (__mmask8) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, + __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvardi512_mask ((__v8di) __Y, + (__v8di) __X, + (__v8di) __W, + __M); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_permutexvar_ps (__m512i __X, __m512 __Y) +{ + return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, + (__v16si) __X, + (__v16sf) + _mm512_undefined_ps (), + (__mmask16) -1); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) +{ + return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, + (__v16si) __X, + (__v16sf) __W, + (__mmask16) __U); +} + +static __inline__ __m512 __DEFAULT_FN_ATTRS +_mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) +{ + return (__m512) __builtin_ia32_permvarsf512_mask ((__v16sf) __Y, + (__v16si) __X, + (__v16sf) + _mm512_setzero_ps (), + (__mmask16) __U); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, + (__v16si) __X, + (__v16si) + _mm512_setzero_si512 (), + __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, + (__v16si) __X, + (__v16si) + _mm512_setzero_si512 (), + (__mmask16) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, + __m512i __Y) +{ + return (__m512i) __builtin_ia32_permvarsi512_mask ((__v16si) __Y, + (__v16si) __X, + (__v16si) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtepi32_epi8 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) +{ + __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtepi32_epi16 (__m512i __A) +{ + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, + (__v16hi) __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) +{ + __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtepi64_epi8 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtepi64_epi32 (__m512i __A) +{ + return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, + (__v8si) __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtepi64_epi16 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtsepi32_epi8 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) +{ + __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtsepi32_epi16 (__m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, + (__v16hi) __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) +{ + __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtsepi64_epi8 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtsepi64_epi32 (__m512i __A) +{ + __v8si __O; + return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, + (__v8si) __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtsepi64_epi16 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtusepi32_epi8 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) +{ + __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtusepi32_epi16 (__m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_undefined_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, + (__v16hi) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) +{ + __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtusepi64_epi8 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_cvtusepi64_epi32 (__m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, + (__v8si) + _mm256_undefined_si256 (), + (__mmask8) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, + (__v8si) __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) +{ + return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_cvtusepi64_epi16 (__m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, + (__v8hi) + _mm_undefined_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) +{ + __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); +} + #undef __DEFAULT_FN_ATTRS #endif // __AVX512FINTRIN_H Index: lib/Headers/avx512vbmiintrin.h =================================================================== --- lib/Headers/avx512vbmiintrin.h +++ lib/Headers/avx512vbmiintrin.h @@ -79,6 +79,38 @@ __U); } + + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_permutexvar_epi8 (__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, + (__v64qi) __A, + (__v64qi) + _mm512_setzero_si512 (), + (__mmask64) -1); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_permutexvar_epi8 (__mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, + (__v64qi) __A, + (__v64qi) + _mm512_setzero_si512(), + (__mmask64) __M); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_permutexvar_epi8 (__m512i __W, __mmask64 __M, __m512i __A, + __m512i __B) +{ + return (__m512i) __builtin_ia32_permvarqi512_mask ((__v64qi) __B, + (__v64qi) __A, + (__v64qi) __W, + (__mmask64) __M); +} #undef __DEFAULT_FN_ATTRS #endif Index: lib/Headers/avx512vbmivlintrin.h =================================================================== --- lib/Headers/avx512vbmivlintrin.h +++ lib/Headers/avx512vbmivlintrin.h @@ -29,7 +29,7 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__(( __nodebug__, __target__("avx512vbmi,avx512vl"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) static __inline__ __m128i __DEFAULT_FN_ATTRS @@ -126,6 +126,66 @@ __U); } +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_permutexvar_epi8 (__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, + (__v16qi) __A, + (__v16qi) + _mm_undefined_si128 (), + (__mmask16) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, + (__v16qi) __A, + (__v16qi) + _mm_setzero_si128 (), + (__mmask16) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarqi128_mask ((__v16qi) __B, + (__v16qi) __A, + (__v16qi) __W, + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_permutexvar_epi8 (__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, + (__v32qi) __A, + (__v32qi) + _mm256_undefined_si256 (), + (__mmask32) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, + (__v32qi) __A, + (__v32qi) + _mm256_setzero_si256 (), + (__mmask32) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarqi256_mask ((__v32qi) __B, + (__v32qi) __A, + (__v32qi) __W, + (__mmask32) __M); +} #undef __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vlbwintrin.h =================================================================== --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -3020,6 +3020,257 @@ _mm256_setzero_si256 (), (__mmask32) __U); } + + +#define _mm_dbsad_epu8( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw128_mask ((__v16qi)( __A),\ + (__v16qi)( __B),\ + ( __imm),\ + (__v8hi)\ + _mm_setzero_hi (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_dbsad_epu8( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw128_mask ((__v16qi)( __A),\ + (__v16qi)( __B),\ + ( __imm),\ + (__v8hi)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_dbsad_epu8( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw128_mask ((__v16qi)( __A),\ + (__v16qi)( __B),\ + ( __imm),\ + (__v8hi)\ + _mm_setzero_si128 (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_dbsad_epu8( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw256_mask ((__v32qi)( __A),\ + (__v32qi)( __B),\ + ( __imm),\ + (__v16hi)\ + _mm256_setzero_si256 (),\ + (__mmask16) -1);\ +}) + +#define _mm256_mask_dbsad_epu8( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw256_mask ((__v32qi)( __A),\ + (__v32qi)( __B),\ + ( __imm),\ + (__v16hi)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm256_maskz_dbsad_epu8( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_dbpsadbw256_mask ((__v32qi)( __A),\ + (__v32qi)( __B),\ + ( __imm),\ + (__v16hi)\ + _mm256_setzero_si256 (),\ + (__mmask16)( __U));\ +}) + +#define _mm_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr128_mask ((__v2di)( __A),\ + (__v2di)( __B),\ + ( __N) * 8,\ + (__v2di)( __W),\ + (__mmask16)( __U));\ +}) + +#define _mm_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr128_mask ((__v2di)( __A),\ + (__v2di)( __B),\ + ( __N) * 8,\ + (__v2di)\ + _mm_setzero_si128 (),\ + (__mmask16)( __U));\ +}) + +#define _mm256_mask_alignr_epi8( __W, __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __N) * 8,\ + (__v4di)( __W),\ + (__mmask32)( __U));\ +}) + +#define _mm256_maskz_alignr_epi8( __U, __A, __B, __N) __extension__ ({ \ +__builtin_ia32_palignr256_mask ((__v4di)( __A),\ + (__v4di)( __B),\ + ( __N) * 8,\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask32)( __U));\ +}) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastb128_mask ((__v16qi) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A, + (__v32qi) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastb256_mask ((__v16qi) __A, + (__v32qi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) +{ + return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A, + (__v8hi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_set1_epi16 (__mmask8 __M, short __A) +{ + return (__m128i) __builtin_ia32_pbroadcastw128_gpr_mask (__A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A, + (__v8hi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastw128_mask ((__v8hi) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) +{ + return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A, + (__v16hi) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_set1_epi16 (__mmask16 __M, short __A) +{ + return (__m256i) __builtin_ia32_pbroadcastw256_gpr_mask (__A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A, + (__v16hi) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastw256_mask ((__v8hi) __A, + (__v16hi) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_permutexvar_epi16 (__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) + _mm_setzero_hi (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) + _mm_setzero_si128 (), + (__mmask8) __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, + __m128i __B) +{ + return (__m128i) __builtin_ia32_permvarhi128_mask ((__v8hi) __B, + (__v8hi) __A, + (__v8hi) __W, + (__mmask8) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_permutexvar_epi16 (__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) -1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) + _mm256_setzero_si256 (), + (__mmask16) __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, + __m256i __B) +{ + return (__m256i) __builtin_ia32_permvarhi256_mask ((__v16hi) __B, + (__v16hi) __A, + (__v16hi) __W, + (__mmask16) __M); +} + #undef __DEFAULT_FN_ATTRS #endif /* __AVX512VLBWINTRIN_H */ Index: lib/Headers/avx512vldqintrin.h =================================================================== --- lib/Headers/avx512vldqintrin.h +++ lib/Headers/avx512vldqintrin.h @@ -948,6 +948,304 @@ (__m256) __builtin_ia32_reduceps256_mask ((__v8sf) __A, __B, \ (__v8sf) _mm256_setzero_ps(), (__mmask8) __U); }) + + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_broadcast_f32x2 (__m128 __A) +{ + return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, + (__v8sf)_mm256_undefined_ps(), + (__mmask8) - + 1); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) +{ + return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, + (__v8sf) __O, + __M); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) +{ + return (__m256) __builtin_ia32_broadcastf32x2_256_mask ((__v4sf) __A, + (__v8sf) + _mm256_setzero_ps (), + __M); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_broadcast_f64x2 (__m128d __A) +{ + return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) + __A, + (__v4df)_mm256_undefined_pd(), + (__mmask8) - + 1); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_broadcast_f64x2 (__m256d __O, __mmask8 __M, __m128d __A) +{ + return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) + __A, + (__v4df) + __O, __M); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) +{ + return (__m256d) __builtin_ia32_broadcastf64x2_256_mask ((__v2df) + __A, + (__v4df) + _mm256_setzero_ps (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_broadcast_i32x2 (__m128i __A) +{ + return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) + __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) - + 1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) + __A, + (__v4si) + __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_broadcasti32x2_128_mask ((__v4si) + __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcast_i32x2 (__m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) + __A, + (__v8si)_mm256_undefined_si256(), + (__mmask8) - + 1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) + __A, + (__v8si) + __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti32x2_256_mask ((__v4si) + __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcast_i64x2 (__m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) + __A, + (__v4di)_mm256_undefined_si256(), + (__mmask8) - + 1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcast_i64x2 (__m256i __O, __mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) + __A, + (__v4di) + __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti64x2_256_mask ((__v2di) + __A, + (__v4di) + _mm256_setzero_si256 (), + __M); +} + +#define _mm256_extractf64x2_pd( __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf64x2_256_mask ((__v4df)( __A),\ + ( __imm),\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_extractf64x2_pd( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf64x2_256_mask ((__v4df)( __A),\ + ( __imm),\ + (__v2df)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_extractf64x2_pd( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf64x2_256_mask ((__v4df)( __A),\ + ( __imm),\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_extracti64x2_epi64( __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x2_256_mask ((__v4di)( __A),\ + ( __imm),\ + (__v2di)\ + _mm_setzero_di (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_extracti64x2_epi64( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x2_256_mask ((__v4di)( __A),\ + ( __imm),\ + (__v2di)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_extracti64x2_epi64( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti64x2_256_mask ((__v4di)( __A),\ + ( __imm),\ + (__v2di)\ + _mm_setzero_di (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm_mask_fpclass_pd_mask( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclasspd128_mask ((__v2df)( __A),\ + ( __imm),( __U));\ +}) + +#define _mm_fpclass_pd_mask( __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclasspd128_mask ((__v2df)( __A),\ + ( __imm),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_fpclass_pd_mask( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclasspd256_mask ((__v4df)( __A),\ + ( __imm),( __U));\ +}) + +#define _mm256_fpclass_pd_mask( __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclasspd256_mask ((__v4df)( __A),\ + ( __imm),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_fpclass_ps_mask( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclassps128_mask ((__v4sf)( __A),\ + ( __imm),( __U));\ +}) + +#define _mm_fpclass_ps_mask( __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclassps128_mask ((__v4sf)( __A),\ + ( __imm),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_fpclass_ps_mask( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclassps256_mask ((__v8sf)( __A),\ + ( __imm),( __U));\ +}) + +#define _mm256_fpclass_ps_mask( __A, __imm) __extension__ ({ \ +__builtin_ia32_fpclassps256_mask ((__v8sf)( __A),\ + ( __imm),\ + (__mmask8) -1);\ +}) + +#define _mm256_insertf64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x2_256_mask ((__v4df)( __A),\ + (__v2df)( __B),\ + ( __imm),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_insertf64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x2_256_mask ((__v4df)( __A),\ + (__v2df)( __B),\ + ( __imm),\ + (__v4df)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_insertf64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf64x2_256_mask ((__v4df)( __A),\ + (__v2df)( __B),\ + ( __imm),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_inserti64x2( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x2_256_mask ((__v4di)( __A),\ + (__v2di)( __B),\ + ( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_inserti64x2( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x2_256_mask ((__v4di)( __A),\ + (__v2di)( __B),\ + ( __imm),\ + (__v4di)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_inserti64x2( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti64x2_256_mask ((__v4di)( __A),\ + (__v2di)( __B),\ + ( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8)\ + ( __U));\ +}) + #undef __DEFAULT_FN_ATTRS #endif Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -6012,31 +6012,6 @@ (__mmask8) __U); } - -#define _mm_mask_set1_epi32( __O, __M, __A) __extension__ ({ \ -__builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si)( __O),\ - ( __M));\ -}) - -#define _mm_maskz_set1_epi32( __M, __A) __extension__ ({ \ -__builtin_ia32_pbroadcastd128_gpr_mask (__A,\ - (__v4si)\ - _mm_setzero_si128 (),\ - ( __M));\ -}) - -#define _mm256_mask_set1_epi32( __O, __M, __A) __extension__ ({ \ -__builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si)( __O),\ - ( __M));\ -}) - -#define _mm256_maskz_set1_epi32( __M, __A) __extension__ ({ \ -__builtin_ia32_pbroadcastd256_gpr_mask (__A,\ - (__v8si)\ - _mm256_setzero_si256 (),\ - ( __M));\ -}) - static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { @@ -6368,6 +6343,1629 @@ _mm256_setzero_ps (), (__mmask8) __U); } + + +#define _mm_alignr_epi32( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd128_mask ((__v4si)( __A),\ + (__v4si)( __B),( __imm),\ + (__v4si)\ + _mm_setzero_si128 (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd128_mask ((__v4si)( __A),\ + (__v4si)( __B),( __imm),\ + (__v4si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd128_mask ((__v4si)( __A),\ + (__v4si)( __B),( __imm),\ + (__v4si)\ + _mm_setzero_si128 (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_alignr_epi32( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd256_mask ((__v8si)( __A),\ + (__v8si)( __B),( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_alignr_epi32( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd256_mask ((__v8si)( __A),\ + (__v8si)( __B),( __imm),\ + (__v8si)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_alignr_epi32( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignd256_mask ((__v8si)( __A),\ + (__v8si)( __B),( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +#define _mm_alignr_epi64( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq128_mask ((__v2di)( __A),\ + (__v2di)( __B),( __imm),\ + (__v2di)\ + _mm_setzero_di (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq128_mask ((__v2di)( __A),\ + (__v2di)( __B),( __imm),\ + (__v2di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq128_mask ((__v2di)( __A),\ + (__v2di)( __B),( __imm),\ + (__v2di)\ + _mm_setzero_di (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_alignr_epi64( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq256_mask ((__v4di)( __A),\ + (__v4di)( __B),( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_alignr_epi64( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq256_mask ((__v4di)( __A),\ + (__v4di)( __B),( __imm),\ + (__v4di)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_alignr_epi64( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_alignq256_mask ((__v4di)( __A),\ + (__v4di)( __B),( __imm),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __U));\ +}) + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_broadcast_f32x4 (__m128 __A) +{ + return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, + (__v8sf)_mm256_undefined_pd (), + (__mmask8) - + 1); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_broadcast_f32x4 (__m256 __O, __mmask8 __M, __m128 __A) +{ + return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, + (__v8sf) __O, + __M); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) +{ + return (__m256) __builtin_ia32_broadcastf32x4_256_mask ((__v4sf) __A, + (__v8sf) + _mm256_setzero_ps (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_broadcast_i32x4 (__m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) + __A, + (__v8si)_mm256_undefined_si256 (), + (__mmask8) - + 1); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcast_i32x4 (__m256i __O, __mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) + __A, + (__v8si) + __O, __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcast_i32x4 (__mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_broadcasti32x4_256_mask ((__v4si) + __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) +{ + return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A, + (__v4df) __O, + __M); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) +{ + return (__m256d) __builtin_ia32_broadcastsd256_mask ((__v2df) __A, + (__v4df) + _mm256_setzero_pd (), + __M); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) +{ + return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A, + (__v4sf) __O, + __M); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) +{ + return (__m128) __builtin_ia32_broadcastss128_mask ((__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + __M); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) +{ + return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A, + (__v8sf) __O, + __M); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) +{ + return (__m256) __builtin_ia32_broadcastss256_mask ((__v4sf) __A, + (__v8sf) + _mm256_setzero_ps (), + __M); +} + +#define _mm256_extractf32x4_ps( __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf32x4_256_mask ((__v8sf)( __A),\ + ( __imm),\ + (__v4sf)\ + _mm_setzero_ps (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_extractf32x4_ps( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf32x4_256_mask ((__v8sf)( __A),\ + ( __imm),\ + (__v4sf)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_extractf32x4_ps( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extractf32x4_256_mask ((__v8sf)( __A),\ + ( __imm),\ + (__v4sf)\ + _mm_setzero_ps (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_extracti32x4_epi32( __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x4_256_mask ((__v8si)( __A),\ + ( __imm),\ + (__v4si)\ + _mm_setzero_si128 (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_extracti32x4_epi32( __W, __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x4_256_mask ((__v8si)( __A),\ + ( __imm),\ + (__v4si)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_extracti32x4_epi32( __U, __A, __imm) __extension__ ({ \ +__builtin_ia32_extracti32x4_256_mask ((__v8si)( __A),\ + ( __imm),\ + (__v4si)\ + _mm_setzero_si128 (),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm_getmant_pd( __A, __B, __C) __extension__({\ +__builtin_ia32_getmantpd128_mask ((__v2df) __A,\ + (__C << 2) | __B,\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_getmant_pd( __W, __U, __A, __B, __C) __extension__({\ +__builtin_ia32_getmantpd128_mask ((__v2df) __A,\ + (__C << 2) | __B,\ + (__v2df) __W,\ + (__mmask8) __U);\ +}) + +#define _mm_maskz_getmant_pd( __U, __A, __B, __C) __extension__({\ +__builtin_ia32_getmantpd128_mask ((__v2df) __A,\ + (__C << 2) | __B,\ + (__v2df)\ + _mm_setzero_pd (),\ + (__mmask8) __U);\ +}) + +#define _mm256_getmant_pd( __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantpd256_mask ((__v4df)( __A),\ + (__C << 2) |( __B),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_getmant_pd( __W, __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantpd256_mask ((__v4df)( __A),\ + (__C << 2) |( __B),\ + (__v4df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_getmant_pd( __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantpd256_mask ((__v4df)( __A),\ + (__C << 2) |( __B),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm_getmant_ps( __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps128_mask ((__v4sf)( __A),\ + (__C << 2) |( __B),\ + (__v4sf)\ + _mm_setzero_ps (),\ + (__mmask8) -1);\ +}) + +#define _mm_mask_getmant_ps( __W, __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps128_mask ((__v4sf)( __A),\ + (__C << 2) |( __B),\ + (__v4sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm_maskz_getmant_ps( __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps128_mask ((__v4sf)( __A),\ + (__C << 2) |( __B),\ + (__v4sf)\ + _mm_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_getmant_ps( __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps256_mask ((__v8sf)( __A),\ + (__C << 2) |( __B),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_getmant_ps( __W, __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps256_mask ((__v8sf)( __A),\ + (__C << 2) |( __B),\ + (__v8sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_getmant_ps( __U, __A, __B, __C) __extension__ ({ \ +__builtin_ia32_getmantps256_mask ((__v8sf)( __A),\ + (__C << 2) |( __B),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_insertf32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x4_256_mask ((__v8sf)( __A),\ + (__v4sf)( __B),\ + ( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_insertf32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x4_256_mask ((__v8sf)( __A),\ + (__v4sf)( __B),\ + ( __imm),\ + (__v8sf)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_insertf32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_insertf32x4_256_mask ((__v8sf)( __A),\ + (__v4sf)( __B),\ + ( __imm),\ + (__v8sf)\ + _mm256_setzero_ps (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_inserti32x4( __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x4_256_mask ((__v8si)( __A),\ + (__v4si)( __B),\ + ( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8) -\ + 1);\ +}) + +#define _mm256_mask_inserti32x4( __W, __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x4_256_mask ((__v8si)( __A),\ + (__v4si)( __B),\ + ( __imm),\ + (__v8si)( __W),\ + (__mmask8)\ + ( __U));\ +}) + +#define _mm256_maskz_inserti32x4( __U, __A, __B, __imm) __extension__ ({ \ +__builtin_ia32_inserti32x4_256_mask ((__v8si)( __A),\ + (__v4si)( __B),\ + ( __imm),\ + (__v8si)\ + _mm256_setzero_si256 (),\ + (__mmask8)\ + ( __U));\ +}) + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) +{ + return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) +{ + return (__m128) __builtin_ia32_movshdup128_mask ((__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) +{ + return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) +{ + return (__m256) __builtin_ia32_movshdup256_mask ((__v8sf) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) +{ + return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A, + (__v4sf) __W, + (__mmask8) __U); +} + +static __inline__ __m128 __DEFAULT_FN_ATTRS +_mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) +{ + return (__m128) __builtin_ia32_movsldup128_mask ((__v4sf) __A, + (__v4sf) + _mm_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) +{ + return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) +{ + return (__m256) __builtin_ia32_movsldup256_mask ((__v8sf) __A, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_set1_epi32 (__m128i __O, __mmask8 __M, int __A) +{ + return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, (__v4si) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_set1_epi32 (__mmask8 __M, int __A) +{ + return (__m128i) __builtin_ia32_pbroadcastd128_gpr_mask (__A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A, + (__v4si) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastd128_mask ((__v4si) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_set1_epi32 (__m256i __O, __mmask8 __M, int __A) +{ + return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, (__v8si) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_set1_epi32 (__mmask8 __M, int __A) +{ + return (__m256i) __builtin_ia32_pbroadcastd256_gpr_mask (__A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A, + (__v8si) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastd256_mask ((__v4si) __A, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A, + (__v2di) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pbroadcastq128_mask ((__v2di) __A, + (__v2di) + _mm_setzero_si128 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A, + (__v4di) __O, + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) +{ + return (__m256i) __builtin_ia32_pbroadcastq256_mask ((__v2di) __A, + (__v4di) + _mm256_setzero_si256 (), + __M); +} + +#define _mm256_mask_permutex_pd( __W, __U, __X, __imm) __extension__ ({ \ +__builtin_ia32_permdf256_mask ((__v4df)( __X),( __imm),\ + (__v4df)( __W),\ + (__mmask8)( __U));\ +}) + +#define _mm256_maskz_permutex_pd( __U, __X, __imm) __extension__ ({ \ +__builtin_ia32_permdf256_mask ((__v4df)( __X),( __imm),\ + (__v4df)\ + _mm256_setzero_pd (),\ + (__mmask8)( __U));\ +}) + +#define _mm256_permutex_pd( __X, __M) __extension__ ({ \ +__builtin_ia32_permdf256_mask ((__v4df)( __X),( __M),\ + (__v4df)\ + _mm256_undefined_pd (),\ + (__mmask8) -1);\ +}) + +#define _mm256_mask_permutex_epi64( __W, __M, __X, __I) __extension__ ({ \ +__builtin_ia32_permdi256_mask ((__v4di)( __X),\ + ( __I),\ + (__v4di)( __W),\ + (__mmask8)( __M));\ +}) + +#define _mm256_maskz_permutex_epi64( __M, __X, __I) __extension__ ({ \ +__builtin_ia32_permdi256_mask ((__v4di)( __X),\ + ( __I),\ + (__v4di)\ + _mm256_setzero_si256 (),\ + (__mmask8)( __M));\ +}) + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_permutexvar_pd (__m256i __X, __m256d __Y) +{ + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, + (__v4di) __X, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) -1); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, + __m256d __Y) +{ + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, + (__v4di) __X, + (__v4df) __W, + (__mmask8) __U); +} + +static __inline__ __m256d __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) +{ + return (__m256d) __builtin_ia32_permvardf256_mask ((__v4df) __Y, + (__v4di) __X, + (__v4df) + _mm256_setzero_pd (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, + (__v4di) __X, + (__v4di) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) +{ + return (__m256i) __builtin_ia32_permvardi256_mask ((__v4di) __Y, + (__v4di) __X, + (__v4di) __W, + __M); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_ps (__m256 __W, __mmask8 __U, __m256i __X, + __m256 __Y) +{ + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, + (__v8si) __X, + (__v8sf) __W, + (__mmask8) __U); +} + +static __inline__ __m256 __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_ps (__mmask8 __U, __m256i __X, __m256 __Y) +{ + return (__m256) __builtin_ia32_permvarsf256_mask ((__v8sf) __Y, + (__v8si) __X, + (__v8sf) + _mm256_setzero_ps (), + (__mmask8) __U); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_permutexvar_epi32 (__mmask8 __M, __m256i __X, __m256i __Y) +{ + return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, + (__v8si) __X, + (__v8si) + _mm256_setzero_si256 (), + __M); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_permutexvar_epi32 (__m256i __W, __mmask8 __M, __m256i __X, + __m256i __Y) +{ + return (__m256i) __builtin_ia32_permvarsi256_mask ((__v8si) __Y, + (__v8si) __X, + (__v8si) __W, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi32_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtepi32_epi8 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi32_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, + (__v8hi) _mm_setzero_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtepi32_epi16 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, + (__v8hi)_mm_setzero_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi64_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtepi64_epi8 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi64_epi32 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, + (__v4si) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtepi64_epi32 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, + (__v4si) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqd256_mask ((__v4di) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtepi64_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, + (__v8hi)__O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtepi64_epi16 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsepi32_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtsepi32_epi8 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsepi32_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, + (__v8hi)_mm_setzero_si128 (), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, + (__v8hi)__O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtsepi32_epi16 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsepi64_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtsepi64_epi8 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, + (__v16qi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsepi64_epi32 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, + (__v4si) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtsepi64_epi32 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, + (__v4si)__O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtsepi64_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtsepi64_epi16 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtusepi32_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtusepi32_epi8 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtusepi32_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtusepi32_epi16 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtusepi64_epi8 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtusepi64_epi8 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, + (__v16qi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, + (__v16qi) __O, + __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, + (__v16qi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtusepi64_epi32 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, + (__v4si) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtusepi64_epi32 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, + (__v4si)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, + (__v4si) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, + (__v4si) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) +{ + __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_cvtusepi64_epi16 (__m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) +{ + __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_cvtusepi64_epi16 (__m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, + (__v8hi)_mm_undefined_si128(), + (__mmask8) -1); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, + (__v8hi) __O, __M); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) +{ + return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, + (__v8hi) + _mm_setzero_si128 (), + __M); +} + +static __inline__ void __DEFAULT_FN_ATTRS +_mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) +{ + return __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); +} + +#define _mm_mask_shuffle_epi32( __W, __U, __A, __mask) __extension__({\ + __builtin_ia32_pshufd128_mask ((__v4si) (__A), (__mask),\ + (__v4si) (__W), (__mmask8) __U);\ +}) + +#define _mm_maskz_shuffle_epi32( __U, __A, __mask) __extension__({\ + __builtin_ia32_pshufd128_mask ((__v4si) (__A), (__mask),\ + (__v4si)\ + _mm_setzero_si128 (),\ + (__mmask8) (__U));\ +}) + +#define _mm256_mask_shuffle_epi32( __W, __U, __A, __mask) __extension__({\ +__builtin_ia32_pshufd256_mask((__v8si) (__A), (__mask),\ + (__v8si) (__W), (__mmask8) __U);\ +}) + +#define _mm256_maskz_shuffle_epi32( __U, __A, __mask) __extension__({\ +__builtin_ia32_pshufd256_mask((__v8si) (__A), (__mask),\ + (__v8si) _mm256_setzero_si256 (),\ + (__mmask8) (__U));\ +}) + + #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_BOTH Index: test/CodeGen/avx512bw-builtins.c =================================================================== --- test/CodeGen/avx512bw-builtins.c +++ test/CodeGen/avx512bw-builtins.c @@ -1347,4 +1347,98 @@ return _mm512_maskz_loadu_epi8(__U, __P); } +__m512i test_mm512_dbsad_epu8(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 + return _mm512_dbsad_epu8(__A, __B, 4); +} + +__m512i test_mm512_mask_dbsad_epu8(__m512i __W, __mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 + return _mm512_mask_dbsad_epu8(__W, __U, __A, __B, 4); +} + +__m512i test_mm512_maskz_dbsad_epu8(__mmask32 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.512 + return _mm512_maskz_dbsad_epu8(__U, __A, __B, 4); +} + +__m512i test_mm512_mask_alignr_epi8(__m512i __W, __mmask64 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.512 + return _mm512_mask_alignr_epi8(__W, __U, __A, __B, 1); +} + +__m512i test_mm512_maskz_alignr_epi8(__mmask64 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.512 + return _mm512_maskz_alignr_epi8(__U, __A, __B, 2); +} + +__m512i test_mm512_broadcastb_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm512_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.512 + return _mm512_broadcastb_epi8(__A); +} + +__m512i test_mm512_mask_broadcastb_epi8(__m512i __O, __mmask64 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_mask_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.512 + return _mm512_mask_broadcastb_epi8(__O, __M, __A); +} + +__m512i test_mm512_maskz_broadcastb_epi8(__mmask64 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.512 + return _mm512_maskz_broadcastb_epi8(__M, __A); +} +__m512i test_mm512_mask_set1_epi16(__m512i __O, __mmask32 __M, short __A) { + // CHECK-LABEL: @test_mm512_mask_set1_epi16 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.w.gpr.512 + return _mm512_mask_set1_epi16(__O, __M, __A); +} + +__m512i test_mm512_maskz_set1_epi16(__mmask32 __M, short __A) { + // CHECK-LABEL: @test_mm512_maskz_set1_epi16 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.w.gpr.512 + return _mm512_maskz_set1_epi16(__M, __A); +} + +__m512i test_mm512_broadcastw_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm512_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.512 + return _mm512_broadcastw_epi16(__A); +} + +__m512i test_mm512_mask_broadcastw_epi16(__m512i __O, __mmask32 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_mask_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.512 + return _mm512_mask_broadcastw_epi16(__O, __M, __A); +} + +__m512i test_mm512_maskz_broadcastw_epi16(__mmask32 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.512 + return _mm512_maskz_broadcastw_epi16(__M, __A); +} + +__m512i test_mm512_permutexvar_epi16(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.512 + return _mm512_permutexvar_epi16(__A, __B); +} + +__m512i test_mm512_maskz_permutexvar_epi16(__mmask32 __M, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.512 + return _mm512_maskz_permutexvar_epi16(__M, __A, __B); +} + +__m512i test_mm512_mask_permutexvar_epi16(__m512i __W, __mmask32 __M, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.512 + return _mm512_mask_permutexvar_epi16(__W, __M, __A, __B); +} Index: test/CodeGen/avx512dq-builtins.c =================================================================== --- test/CodeGen/avx512dq-builtins.c +++ test/CodeGen/avx512dq-builtins.c @@ -743,3 +743,261 @@ return _mm512_maskz_reduce_round_ps(__U, __A, 4, 8); } +__m512 test_mm512_broadcast_f32x2(__m128 __A) { + // CHECK-LABEL: @test_mm512_broadcast_f32x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + return _mm512_broadcast_f32x2(__A); +} + +__m512 test_mm512_mask_broadcast_f32x2(__m512 __O, __mmask16 __M, __m128 __A) { + // CHECK-LABEL: @test_mm512_mask_broadcast_f32x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + return _mm512_mask_broadcast_f32x2(__O, __M, __A); +} + +__m512 test_mm512_maskz_broadcast_f32x2(__mmask16 __M, __m128 __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + return _mm512_maskz_broadcast_f32x2(__M, __A); +} + +__m512 test_mm512_broadcast_f32x8(__m256 __A) { + // CHECK-LABEL: @test_mm512_broadcast_f32x8 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x8 + return _mm512_broadcast_f32x8(__A); +} + +__m512 test_mm512_mask_broadcast_f32x8(__m512 __O, __mmask16 __M, __m256 __A) { + // CHECK-LABEL: @test_mm512_mask_broadcast_f32x8 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x8 + return _mm512_mask_broadcast_f32x8(__O, __M, __A); +} + +__m512 test_mm512_maskz_broadcast_f32x8(__mmask16 __M, __m256 __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcast_f32x8 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x8 + return _mm512_maskz_broadcast_f32x8(__M, __A); +} + +__m512d test_mm512_broadcast_f64x2(__m128d __A) { + // CHECK-LABEL: @test_mm512_broadcast_f64x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 + return _mm512_broadcast_f64x2(__A); +} + +__m512d test_mm512_mask_broadcast_f64x2(__m512d __O, __mmask8 __M, __m128d __A) { + // CHECK-LABEL: @test_mm512_mask_broadcast_f64x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 + return _mm512_mask_broadcast_f64x2(__O, __M, __A); +} + +__m512d test_mm512_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcast_f64x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 + return _mm512_maskz_broadcast_f64x2(__M, __A); +} + +__m512i test_mm512_broadcast_i32x2(__m128i __A) { + // CHECK-LABEL: @test_mm512_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm512_broadcast_i32x2(__A); +} + +__m512i test_mm512_mask_broadcast_i32x2(__m512i __O, __mmask16 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_mask_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm512_mask_broadcast_i32x2(__O, __M, __A); +} + +__m512i test_mm512_maskz_broadcast_i32x2(__mmask16 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm512_maskz_broadcast_i32x2(__M, __A); +} + +__m512i test_mm512_broadcast_i32x8(__m256i __A) { + // CHECK-LABEL: @test_mm512_broadcast_i32x8 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x8 + return _mm512_broadcast_i32x8(__A); +} + +__m512i test_mm512_mask_broadcast_i32x8(__m512i __O, __mmask16 __M, __m256i __A) { + // CHECK-LABEL: @test_mm512_mask_broadcast_i32x8 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x8 + return _mm512_mask_broadcast_i32x8(__O, __M, __A); +} + +__m512i test_mm512_maskz_broadcast_i32x8(__mmask16 __M, __m256i __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcast_i32x8 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x8 + return _mm512_maskz_broadcast_i32x8(__M, __A); +} + +__m512i test_mm512_broadcast_i64x2(__m128i __A) { + // CHECK-LABEL: @test_mm512_broadcast_i64x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 + return _mm512_broadcast_i64x2(__A); +} + +__m512i test_mm512_mask_broadcast_i64x2(__m512i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_mask_broadcast_i64x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 + return _mm512_mask_broadcast_i64x2(__O, __M, __A); +} + +__m512i test_mm512_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm512_maskz_broadcast_i64x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 + return _mm512_maskz_broadcast_i64x2(__M, __A); +} + +__m256 test_mm512_extractf32x8_ps(__m512 __A) { + // CHECK-LABEL: @test_mm512_extractf32x8_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x8 + return _mm512_extractf32x8_ps(__A, 4); +} + +__m256 test_mm512_mask_extractf32x8_ps(__m256 __W, __mmask8 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_extractf32x8_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x8 + return _mm512_mask_extractf32x8_ps(__W, __U, __A, 4); +} + +__m256 test_mm512_maskz_extractf32x8_ps(__mmask8 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_maskz_extractf32x8_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x8 + return _mm512_maskz_extractf32x8_ps(__U, __A, 4); +} + +__m128d test_mm512_extractf64x2_pd(__m512d __A) { + // CHECK-LABEL: @test_mm512_extractf64x2_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + return _mm512_extractf64x2_pd(__A, 4); +} + +__m128d test_mm512_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_extractf64x2_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + return _mm512_mask_extractf64x2_pd(__W, __U, __A, 4); +} + +__m128d test_mm512_maskz_extractf64x2_pd(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_maskz_extractf64x2_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + return _mm512_maskz_extractf64x2_pd(__U, __A, 4); +} + +__m256i test_mm512_extracti32x8_epi32(__m512i __A) { + // CHECK-LABEL: @test_mm512_extracti32x8_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x8 + return _mm512_extracti32x8_epi32(__A, 4); +} + +__m256i test_mm512_mask_extracti32x8_epi32(__m256i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_extracti32x8_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x8 + return _mm512_mask_extracti32x8_epi32(__W, __U, __A, 4); +} + +__m256i test_mm512_maskz_extracti32x8_epi32(__mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_extracti32x8_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x8 + return _mm512_maskz_extracti32x8_epi32(__U, __A, 4); +} + +__m128i test_mm512_extracti64x2_epi64(__m512i __A) { + // CHECK-LABEL: @test_mm512_extracti64x2_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + return _mm512_extracti64x2_epi64(__A, 4); +} + +__m128i test_mm512_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_extracti64x2_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + return _mm512_mask_extracti64x2_epi64(__W, __U, __A, 4); +} + +__m128i test_mm512_maskz_extracti64x2_epi64(__mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_extracti64x2_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + return _mm512_maskz_extracti64x2_epi64(__U, __A, 4); +} + +__mmask8 test_mm512_mask_fpclass_pd_mask(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_fpclass_pd_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.pd.512 + return _mm512_mask_fpclass_pd_mask(__U, __A, 4); +} + +__mmask8 test_mm512_fpclass_pd_mask(__m512d __A) { + // CHECK-LABEL: @test_mm512_fpclass_pd_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.pd.512 + return _mm512_fpclass_pd_mask(__A, 4); +} + +__mmask16 test_mm512_mask_fpclass_ps_mask(__mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_fpclass_ps_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.ps.512 + return _mm512_mask_fpclass_ps_mask(__U, __A, 4); +} + +__mmask16 test_mm512_fpclass_ps_mask(__m512 __A) { + // CHECK-LABEL: @test_mm512_fpclass_ps_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.ps.512 + return _mm512_fpclass_ps_mask(__A, 4); +} + +__m512d test_mm512_insertf64x2(__m512d __A, __m128d __B) { + // CHECK-LABEL: @test_mm512_insertf64x2 + // CHECK: @llvm.x86.avx512.mask.insertf64x2 + return _mm512_insertf64x2(__A, __B, 3); +} + +__m512d test_mm512_mask_insertf64x2(__m512d __W, __mmask8 __U, __m512d __A, __m128d __B) { + // CHECK-LABEL: @test_mm512_mask_insertf64x2 + // CHECK: @llvm.x86.avx512.mask.insertf64x2 + return _mm512_mask_insertf64x2(__W, __U, __A, __B, 3); +} + +__m512d test_mm512_maskz_insertf64x2(__mmask8 __U, __m512d __A, __m128d __B) { + // CHECK-LABEL: @test_mm512_maskz_insertf64x2 + // CHECK: @llvm.x86.avx512.mask.insertf64x2 + return _mm512_maskz_insertf64x2(__U, __A, __B, 3); +} + +__m512i test_mm512_inserti32x8(__m512i __A, __m256i __B) { + // CHECK-LABEL: @test_mm512_inserti32x8 + // CHECK: @llvm.x86.avx512.mask.inserti32x8 + return _mm512_inserti32x8(__A, __B, 1); +} + +__m512i test_mm512_mask_inserti32x8(__m512i __W, __mmask16 __U, __m512i __A, __m256i __B) { + // CHECK-LABEL: @test_mm512_mask_inserti32x8 + // CHECK: @llvm.x86.avx512.mask.inserti32x8 + return _mm512_mask_inserti32x8(__W, __U, __A, __B, 1); +} + +__m512i test_mm512_maskz_inserti32x8(__mmask16 __U, __m512i __A, __m256i __B) { + // CHECK-LABEL: @test_mm512_maskz_inserti32x8 + // CHECK: @llvm.x86.avx512.mask.inserti32x8 + return _mm512_maskz_inserti32x8(__U, __A, __B, 1); +} + +__m512i test_mm512_inserti64x2(__m512i __A, __m128i __B) { + // CHECK-LABEL: @test_mm512_inserti64x2 + // CHECK: @llvm.x86.avx512.mask.inserti64x2 + return _mm512_inserti64x2(__A, __B, 1); +} + +__m512i test_mm512_mask_inserti64x2(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { + // CHECK-LABEL: @test_mm512_mask_inserti64x2 + // CHECK: @llvm.x86.avx512.mask.inserti64x2 + return _mm512_mask_inserti64x2(__W, __U, __A, __B, 1); +} + +__m512i test_mm512_maskz_inserti64x2(__mmask8 __U, __m512i __A, __m128i __B) { + // CHECK-LABEL: @test_mm512_maskz_inserti64x2 + // CHECK: @llvm.x86.avx512.mask.inserti64x2 + return _mm512_maskz_inserti64x2(__U, __A, __B, 1); +} + Index: test/CodeGen/avx512f-builtins.c =================================================================== --- test/CodeGen/avx512f-builtins.c +++ test/CodeGen/avx512f-builtins.c @@ -177,14 +177,14 @@ { // CHECK-LABEL: @test_mm512_alignr_epi32 // CHECK: @llvm.x86.avx512.mask.valign.d.512 - return _mm512_alignr_epi32(a, b, 2); + return _mm512_alignr_epi32(a, b, 0); } __m512i test_mm512_alignr_epi64(__m512i a, __m512i b) { // CHECK-LABEL: @test_mm512_alignr_epi64 // CHECK: @llvm.x86.avx512.mask.valign.q.512 - return _mm512_alignr_epi64(a, b, 2); + return _mm512_alignr_epi64(a, b, 0); } __m512d test_mm512_broadcastsd_pd(__m128d a) @@ -762,13 +762,13 @@ __mmask16 test_mm512_cmp_round_ps_mask(__m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_cmp_round_ps_mask // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 - return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_TO_NEAREST_INT); + return _mm512_cmp_round_ps_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_mask_cmp_round_ps_mask(__mmask16 m, __m512 a, __m512 b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_ps_mask // CHECK: @llvm.x86.avx512.mask.cmp.ps.512 - return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_TO_NEAREST_INT); + return _mm512_mask_cmp_round_ps_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask16 test_mm512_cmp_ps_mask(__m512 a, __m512 b) { @@ -786,13 +786,13 @@ __mmask8 test_mm512_cmp_round_pd_mask(__m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_cmp_round_pd_mask // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 - return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_TO_NEAREST_INT); + return _mm512_cmp_round_pd_mask(a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_mask_cmp_round_pd_mask(__mmask8 m, __m512d a, __m512d b) { // CHECK-LABEL: @test_mm512_mask_cmp_round_pd_mask // CHECK: @llvm.x86.avx512.mask.cmp.pd.512 - return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_TO_NEAREST_INT); + return _mm512_mask_cmp_round_pd_mask(m, a, b, 0, _MM_FROUND_CUR_DIRECTION); } __mmask8 test_mm512_cmp_pd_mask(__m512d a, __m512d b) { @@ -2576,5 +2576,938 @@ return _mm512_kmov(__A); } +__mmask8 test_mm_cmp_round_sd_mask(__m128d __X, __m128d __Y) { + // CHECK-LABEL: @test_mm_cmp_round_sd_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_cmp_round_sd_mask(__X, __Y, 5, _MM_FROUND_CUR_DIRECTION); +} + +__mmask8 test_mm_mask_cmp_round_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { + // CHECK-LABEL: @test_mm_mask_cmp_round_sd_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_mask_cmp_round_sd_mask(__M, __X, __Y, 5, _MM_FROUND_CUR_DIRECTION); +} + +__mmask8 test_mm_cmp_sd_mask(__m128d __X, __m128d __Y) { + // CHECK-LABEL: @test_mm_cmp_sd_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_cmp_sd_mask(__X, __Y, 5); +} + +__mmask8 test_mm_mask_cmp_sd_mask(__mmask8 __M, __m128d __X, __m128d __Y) { + // CHECK-LABEL: @test_mm_mask_cmp_sd_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_mask_cmp_sd_mask(__M, __X, __Y, 5); +} + +__mmask8 test_mm_cmp_round_ss_mask(__m128 __X, __m128 __Y) { + // CHECK-LABEL: @test_mm_cmp_round_ss_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_cmp_round_ss_mask(__X, __Y, 5, _MM_FROUND_CUR_DIRECTION); +} + +__mmask8 test_mm_mask_cmp_round_ss_mask(__mmask8 __M, __m128 __X, __m128 __Y) { + // CHECK-LABEL: @test_mm_mask_cmp_round_ss_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_mask_cmp_round_ss_mask(__M, __X, __Y, 5, _MM_FROUND_CUR_DIRECTION); +} + +__mmask8 test_mm_cmp_ss_mask(__m128 __X, __m128 __Y) { + // CHECK-LABEL: @test_mm_cmp_ss_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_cmp_ss_mask(__X, __Y, 5); +} + +__mmask8 test_mm_mask_cmp_ss_mask(__mmask8 __M, __m128 __X, __m128 __Y) { + // CHECK-LABEL: @test_mm_mask_cmp_ss_mask + // CHECK: @llvm.x86.avx512.mask.cmp + return _mm_mask_cmp_ss_mask(__M, __X, __Y, 5); +} + +__m512d test_mm512_mask_compress_pd(__m512d __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_compress_pd + // CHECK: @llvm.x86.avx512.mask.compress.pd.512 + return _mm512_mask_compress_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_compress_pd(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_maskz_compress_pd + // CHECK: @llvm.x86.avx512.mask.compress.pd.512 + return _mm512_maskz_compress_pd(__U, __A); +} + +__m512i test_mm512_mask_compress_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_compress_epi64 + // CHECK: @llvm.x86.avx512.mask.compress.q.512 + return _mm512_mask_compress_epi64(__W, __U, __A); +} + +__m512i test_mm512_maskz_compress_epi64(__mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_compress_epi64 + // CHECK: @llvm.x86.avx512.mask.compress.q.512 + return _mm512_maskz_compress_epi64(__U, __A); +} + +__m512 test_mm512_mask_compress_ps(__m512 __W, __mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_compress_ps + // CHECK: @llvm.x86.avx512.mask.compress.ps.512 + return _mm512_mask_compress_ps(__W, __U, __A); +} + +__m512 test_mm512_maskz_compress_ps(__mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_maskz_compress_ps + // CHECK: @llvm.x86.avx512.mask.compress.ps.512 + return _mm512_maskz_compress_ps(__U, __A); +} + +__m512i test_mm512_mask_compress_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_compress_epi32 + // CHECK: @llvm.x86.avx512.mask.compress.d.512 + return _mm512_mask_compress_epi32(__W, __U, __A); +} + +__m512i test_mm512_maskz_compress_epi32(__mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_compress_epi32 + // CHECK: @llvm.x86.avx512.mask.compress.d.512 + return _mm512_maskz_compress_epi32(__U, __A); +} + +__m512d test_mm512_cvt_roundps_pd(__m256 __A) { + // CHECK-LABEL: @test_mm512_cvt_roundps_pd + // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + return _mm512_cvt_roundps_pd(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_mask_cvt_roundps_pd(__m512d __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm512_mask_cvt_roundps_pd + // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + return _mm512_mask_cvt_roundps_pd(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_maskz_cvt_roundps_pd(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm512_maskz_cvt_roundps_pd + // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + return _mm512_maskz_cvt_roundps_pd(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_cvtps_pd(__m256 __A) { + // CHECK-LABEL: @test_mm512_cvtps_pd + // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + return _mm512_cvtps_pd(__A); +} + +__m512d test_mm512_mask_cvtps_pd(__m512d __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm512_mask_cvtps_pd + // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + return _mm512_mask_cvtps_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_cvtps_pd(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtps_pd + // CHECK: @llvm.x86.avx512.mask.cvtps2pd.512 + return _mm512_maskz_cvtps_pd(__U, __A); +} + +__m256i test_mm512_cvtt_roundpd_epu32(__m512d __A) { + // CHECK-LABEL: @test_mm512_cvtt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + return _mm512_cvtt_roundpd_epu32(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_mask_cvtt_roundpd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_cvtt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + return _mm512_mask_cvtt_roundpd_epu32(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_maskz_cvtt_roundpd_epu32(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtt_roundpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + return _mm512_maskz_cvtt_roundpd_epu32(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m256i test_mm512_cvttpd_epu32(__m512d __A) { + // CHECK-LABEL: @test_mm512_cvttpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + return _mm512_cvttpd_epu32(__A); +} + +__m256i test_mm512_mask_cvttpd_epu32(__m256i __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_cvttpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + return _mm512_mask_cvttpd_epu32(__W, __U, __A); +} + +__m256i test_mm512_maskz_cvttpd_epu32(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_maskz_cvttpd_epu32 + // CHECK: @llvm.x86.avx512.mask.cvttpd2udq.512 + return _mm512_maskz_cvttpd_epu32(__U, __A); +} + +__m512d test_mm512_mask_expand_pd(__m512d __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_expand_pd + // CHECK: @llvm.x86.avx512.mask.expand.pd.512 + return _mm512_mask_expand_pd(__W, __U, __A); +} + +__m512i test_mm512_mask_expand_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_expand_epi64 + // CHECK: @llvm.x86.avx512.mask.expand.q.512 + return _mm512_mask_expand_epi64(__W, __U, __A); +} + +__m512d test_mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) { + // CHECK-LABEL: @test_mm512_mask_expandloadu_pd + // CHECK: @llvm.x86.avx512.mask.expand.load.pd.512 + return _mm512_mask_expandloadu_pd(__W, __U, __P); +} + +__m512i test_mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { + // CHECK-LABEL: @test_mm512_mask_expandloadu_epi64 + // CHECK: @llvm.x86.avx512.mask.expand.load.q.512 + return _mm512_mask_expandloadu_epi64(__W, __U, __P); +} + +__m512 test_mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) { + // CHECK-LABEL: @test_mm512_mask_expandloadu_ps + // CHECK: @llvm.x86.avx512.mask.expand.load.ps.512 + return _mm512_mask_expandloadu_ps(__W, __U, __P); +} + +__m512i test_mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) { + // CHECK-LABEL: @test_mm512_mask_expandloadu_epi32 + // CHECK: @llvm.x86.avx512.mask.expand.load.d.512 + return _mm512_mask_expandloadu_epi32(__W, __U, __P); +} + +__m512 test_mm512_mask_expand_ps(__m512 __W, __mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_expand_ps + // CHECK: @llvm.x86.avx512.mask.expand.ps.512 + return _mm512_mask_expand_ps(__W, __U, __A); +} + +__m512i test_mm512_mask_expand_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_expand_epi32 + // CHECK: @llvm.x86.avx512.mask.expand.d.512 + return _mm512_mask_expand_epi32(__W, __U, __A); +} + +__m128i test_mm512_extracti32x4_epi32(__m512i __A) { + // CHECK-LABEL: @test_mm512_extracti32x4_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + return _mm512_extracti32x4_epi32(__A, 0); +} + +__m128i test_mm512_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_extracti32x4_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + return _mm512_mask_extracti32x4_epi32(__W, __U, __A, 0); +} + +__m128i test_mm512_maskz_extracti32x4_epi32(__mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_extracti32x4_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + return _mm512_maskz_extracti32x4_epi32(__U, __A, 0); +} + +__m256i test_mm512_extracti64x4_epi64(__m512i __A) { + // CHECK-LABEL: @test_mm512_extracti64x4_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x4 + return _mm512_extracti64x4_epi64(__A, 0); +} + +__m256i test_mm512_mask_extracti64x4_epi64(__m256i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_extracti64x4_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x4 + return _mm512_mask_extracti64x4_epi64(__W, __U, __A, 0); +} + +__m256i test_mm512_maskz_extracti64x4_epi64(__mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_extracti64x4_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x4 + return _mm512_maskz_extracti64x4_epi64(__U, __A, 0); +} + +__m512d test_mm512_getexp_round_pd(__m512d __A) { + // CHECK-LABEL: @test_mm512_getexp_round_pd + // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + return _mm512_getexp_round_pd(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_mask_getexp_round_pd(__m512d __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_getexp_round_pd + // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + return _mm512_mask_getexp_round_pd(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_maskz_getexp_round_pd(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_maskz_getexp_round_pd + // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + return _mm512_maskz_getexp_round_pd(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_getexp_pd(__m512d __A) { + // CHECK-LABEL: @test_mm512_getexp_pd + // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + return _mm512_getexp_pd(__A); +} + +__m512d test_mm512_mask_getexp_pd(__m512d __W, __mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_mask_getexp_pd + // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + return _mm512_mask_getexp_pd(__W, __U, __A); +} + +__m512d test_mm512_maskz_getexp_pd(__mmask8 __U, __m512d __A) { + // CHECK-LABEL: @test_mm512_maskz_getexp_pd + // CHECK: @llvm.x86.avx512.mask.getexp.pd.512 + return _mm512_maskz_getexp_pd(__U, __A); +} + +__m512 test_mm512_getexp_round_ps(__m512 __A) { + // CHECK-LABEL: @test_mm512_getexp_round_ps + // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + return _mm512_getexp_round_ps(__A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_mask_getexp_round_ps(__m512 __W, __mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_getexp_round_ps + // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + return _mm512_mask_getexp_round_ps(__W, __U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_maskz_getexp_round_ps(__mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_maskz_getexp_round_ps + // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + return _mm512_maskz_getexp_round_ps(__U, __A, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_getexp_ps(__m512 __A) { + // CHECK-LABEL: @test_mm512_getexp_ps + // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + return _mm512_getexp_ps(__A); +} + +__m512 test_mm512_mask_getexp_ps(__m512 __W, __mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_getexp_ps + // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + return _mm512_mask_getexp_ps(__W, __U, __A); +} + +__m512 test_mm512_maskz_getexp_ps(__mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_maskz_getexp_ps + // CHECK: @llvm.x86.avx512.mask.getexp.ps.512 + return _mm512_maskz_getexp_ps(__U, __A); +} + +__m512d test_mm512_getmant_round_pd(__m512d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_getmant_round_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + return _mm512_getmant_round_pd(__A, 1, 1, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_mask_getmant_round_pd(__m512d __W, __mmask8 __U, __m512d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_mask_getmant_round_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + return _mm512_mask_getmant_round_pd(__W, __U, __A, 1, 1, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_maskz_getmant_round_pd(__mmask8 __U, __m512d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_maskz_getmant_round_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + return _mm512_maskz_getmant_round_pd(__U, __A, 1, 1, _MM_FROUND_CUR_DIRECTION); +} + +__m512d test_mm512_getmant_pd(__m512d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + return _mm512_getmant_pd(__A, 1, 2); +} + +__m512d test_mm512_mask_getmant_pd(__m512d __W, __mmask8 __U, __m512d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_mask_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + return _mm512_mask_getmant_pd(__W, __U, __A, 1, 2); +} + +__m512d test_mm512_maskz_getmant_pd(__mmask8 __U, __m512d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_maskz_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.512 + return _mm512_maskz_getmant_pd(__U, __A, 1, 2); +} + +__m512 test_mm512_getmant_round_ps(__m512 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_getmant_round_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + return _mm512_getmant_round_ps(__A, 1, 2, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_mask_getmant_round_ps(__m512 __W, __mmask16 __U, __m512 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_mask_getmant_round_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + return _mm512_mask_getmant_round_ps(__W, __U, __A, 1, 2, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_maskz_getmant_round_ps(__mmask16 __U, __m512 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_maskz_getmant_round_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + return _mm512_maskz_getmant_round_ps(__U, __A, 1, 2, _MM_FROUND_CUR_DIRECTION); +} + +__m512 test_mm512_getmant_ps(__m512 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + return _mm512_getmant_ps(__A, 1, 2); +} + +__m512 test_mm512_mask_getmant_ps(__m512 __W, __mmask16 __U, __m512 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_mask_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + return _mm512_mask_getmant_ps(__W, __U, __A, 1, 2); +} + +__m512 test_mm512_maskz_getmant_ps(__mmask16 __U, __m512 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm512_maskz_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.512 + return _mm512_maskz_getmant_ps(__U, __A, 1, 2); +} + +__m512d test_mm512_insertf64x4(__m512d __A, __m256d __B) { + // CHECK-LABEL: @test_mm512_insertf64x4 + // CHECK: @llvm.x86.avx512.mask.insertf64x4 + return _mm512_insertf64x4(__A, __B, 0); +} + +__m512d test_mm512_mask_insertf64x4(__m512d __W, __mmask8 __U, __m512d __A, __m256d __B) { + // CHECK-LABEL: @test_mm512_mask_insertf64x4 + // CHECK: @llvm.x86.avx512.mask.insertf64x4 + return _mm512_mask_insertf64x4(__W, __U, __A, __B, 0); +} + +__m512d test_mm512_maskz_insertf64x4(__mmask8 __U, __m512d __A, __m256d __B) { + // CHECK-LABEL: @test_mm512_maskz_insertf64x4 + // CHECK: @llvm.x86.avx512.mask.insertf64x4 + return _mm512_maskz_insertf64x4(__U, __A, __B, 0); +} + +__m512i test_mm512_inserti64x4(__m512i __A, __m256i __B) { + // CHECK-LABEL: @test_mm512_inserti64x4 + // CHECK: @llvm.x86.avx512.mask.inserti64x4 + return _mm512_inserti64x4(__A, __B, 0); +} + +__m512i test_mm512_mask_inserti64x4(__m512i __W, __mmask8 __U, __m512i __A, __m256i __B) { + // CHECK-LABEL: @test_mm512_mask_inserti64x4 + // CHECK: @llvm.x86.avx512.mask.inserti64x4 + return _mm512_mask_inserti64x4(__W, __U, __A, __B, 0); +} + +__m512i test_mm512_maskz_inserti64x4(__mmask8 __U, __m512i __A, __m256i __B) { + // CHECK-LABEL: @test_mm512_maskz_inserti64x4 + // CHECK: @llvm.x86.avx512.mask.inserti64x4 + return _mm512_maskz_inserti64x4(__U, __A, __B, 0); +} + +__m512 test_mm512_movehdup_ps(__m512 __A) { + // CHECK-LABEL: @test_mm512_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.512 + return _mm512_movehdup_ps(__A); +} + +__m512 test_mm512_mask_movehdup_ps(__m512 __W, __mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.512 + return _mm512_mask_movehdup_ps(__W, __U, __A); +} + +__m512 test_mm512_maskz_movehdup_ps(__mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_maskz_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.512 + return _mm512_maskz_movehdup_ps(__U, __A); +} + +__m512 test_mm512_moveldup_ps(__m512 __A) { + // CHECK-LABEL: @test_mm512_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.512 + return _mm512_moveldup_ps(__A); +} + +__m512 test_mm512_mask_moveldup_ps(__m512 __W, __mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_mask_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.512 + return _mm512_mask_moveldup_ps(__W, __U, __A); +} + +__m512 test_mm512_maskz_moveldup_ps(__mmask16 __U, __m512 __A) { + // CHECK-LABEL: @test_mm512_maskz_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.512 + return _mm512_maskz_moveldup_ps(__U, __A); +} + +__m512d test_mm512_permutex_pd(__m512d __X) { + // CHECK-LABEL: @test_mm512_permutex_pd + // CHECK: @llvm.x86.avx512.mask.perm.df.512 + return _mm512_permutex_pd(__X, 0); +} + +__m512d test_mm512_mask_permutex_pd(__m512d __W, __mmask8 __U, __m512d __X) { + // CHECK-LABEL: @test_mm512_mask_permutex_pd + // CHECK: @llvm.x86.avx512.mask.perm.df.512 + return _mm512_mask_permutex_pd(__W, __U, __X, 0); +} + +__m512d test_mm512_maskz_permutex_pd(__mmask8 __U, __m512d __X) { + // CHECK-LABEL: @test_mm512_maskz_permutex_pd + // CHECK: @llvm.x86.avx512.mask.perm.df.512 + return _mm512_maskz_permutex_pd(__U, __X, 0); +} + +__m512i test_mm512_permutex_epi64(__m512i __X) { + // CHECK-LABEL: @test_mm512_permutex_epi64 + // CHECK: @llvm.x86.avx512.mask.perm.di.512 + return _mm512_permutex_epi64(__X, 0); +} + +__m512i test_mm512_mask_permutex_epi64(__m512i __W, __mmask8 __M, __m512i __X) { + // CHECK-LABEL: @test_mm512_mask_permutex_epi64 + // CHECK: @llvm.x86.avx512.mask.perm.di.512 + return _mm512_mask_permutex_epi64(__W, __M, __X, 0); +} + +__m512i test_mm512_maskz_permutex_epi64(__mmask8 __M, __m512i __X) { + // CHECK-LABEL: @test_mm512_maskz_permutex_epi64 + // CHECK: @llvm.x86.avx512.mask.perm.di.512 + return _mm512_maskz_permutex_epi64(__M, __X, 0); +} + +__m512d test_mm512_permutexvar_pd(__m512i __X, __m512d __Y) { + // CHECK-LABEL: @test_mm512_permutexvar_pd + // CHECK: @llvm.x86.avx512.mask.permvar.df.512 + return _mm512_permutexvar_pd(__X, __Y); +} + +__m512d test_mm512_mask_permutexvar_pd(__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) { + // CHECK-LABEL: @test_mm512_mask_permutexvar_pd + // CHECK: @llvm.x86.avx512.mask.permvar.df.512 + return _mm512_mask_permutexvar_pd(__W, __U, __X, __Y); +} + +__m512d test_mm512_maskz_permutexvar_pd(__mmask8 __U, __m512i __X, __m512d __Y) { + // CHECK-LABEL: @test_mm512_maskz_permutexvar_pd + // CHECK: @llvm.x86.avx512.mask.permvar.df.512 + return _mm512_maskz_permutexvar_pd(__U, __X, __Y); +} + +__m512i test_mm512_maskz_permutexvar_epi64(__mmask8 __M, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi64 + // CHECK: @llvm.x86.avx512.mask.permvar.di.512 + return _mm512_maskz_permutexvar_epi64(__M, __X, __Y); +} + +__m512i test_mm512_permutexvar_epi64(__m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_permutexvar_epi64 + // CHECK: @llvm.x86.avx512.mask.permvar.di.512 + return _mm512_permutexvar_epi64(__X, __Y); +} + +__m512i test_mm512_mask_permutexvar_epi64(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_mask_permutexvar_epi64 + // CHECK: @llvm.x86.avx512.mask.permvar.di.512 + return _mm512_mask_permutexvar_epi64(__W, __M, __X, __Y); +} + +__m512 test_mm512_permutexvar_ps(__m512i __X, __m512 __Y) { + // CHECK-LABEL: @test_mm512_permutexvar_ps + // CHECK: @llvm.x86.avx512.mask.permvar.sf.512 + return _mm512_permutexvar_ps(__X, __Y); +} + +__m512 test_mm512_mask_permutexvar_ps(__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { + // CHECK-LABEL: @test_mm512_mask_permutexvar_ps + // CHECK: @llvm.x86.avx512.mask.permvar.sf.512 + return _mm512_mask_permutexvar_ps(__W, __U, __X, __Y); +} + +__m512 test_mm512_maskz_permutexvar_ps(__mmask16 __U, __m512i __X, __m512 __Y) { + // CHECK-LABEL: @test_mm512_maskz_permutexvar_ps + // CHECK: @llvm.x86.avx512.mask.permvar.sf.512 + return _mm512_maskz_permutexvar_ps(__U, __X, __Y); +} + +__m512i test_mm512_maskz_permutexvar_epi32(__mmask16 __M, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi32 + // CHECK: @llvm.x86.avx512.mask.permvar.si.512 + return _mm512_maskz_permutexvar_epi32(__M, __X, __Y); +} + +__m512i test_mm512_permutexvar_epi32(__m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_permutexvar_epi32 + // CHECK: @llvm.x86.avx512.mask.permvar.si.512 + return _mm512_permutexvar_epi32(__X, __Y); +} + +__m512i test_mm512_mask_permutexvar_epi32(__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { + // CHECK-LABEL: @test_mm512_mask_permutexvar_epi32 + // CHECK: @llvm.x86.avx512.mask.permvar.si.512 + return _mm512_mask_permutexvar_epi32(__W, __M, __X, __Y); +} + +__m128i test_mm512_cvtepi32_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.512 + return _mm512_cvtepi32_epi8(__A); +} + +__m128i test_mm512_mask_cvtepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.512 + return _mm512_mask_cvtepi32_epi8(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtepi32_epi8(__mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.512 + return _mm512_maskz_cvtepi32_epi8(__M, __A); +} + +void test_mm512_mask_cvtepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.512 + return _mm512_mask_cvtepi32_storeu_epi8(__P, __M, __A); +} + +__m256i test_mm512_cvtepi32_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 + return _mm512_cvtepi32_epi16(__A); +} + +__m256i test_mm512_mask_cvtepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 + return _mm512_mask_cvtepi32_epi16(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtepi32_epi16(__mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.512 + return _mm512_maskz_cvtepi32_epi16(__M, __A); +} + +void test_mm512_mask_cvtepi32_storeu_epi16(void * __P, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.512 + return _mm512_mask_cvtepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm512_cvtepi64_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 + return _mm512_cvtepi64_epi8(__A); +} + +__m128i test_mm512_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 + return _mm512_mask_cvtepi64_epi8(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtepi64_epi8(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.512 + return _mm512_maskz_cvtepi64_epi8(__M, __A); +} + +void test_mm512_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.512 + return _mm512_mask_cvtepi64_storeu_epi8(__P, __M, __A); +} + +__m256i test_mm512_cvtepi64_epi32(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.512 + return _mm512_cvtepi64_epi32(__A); +} + +__m256i test_mm512_mask_cvtepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.512 + return _mm512_mask_cvtepi64_epi32(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtepi64_epi32(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.512 + return _mm512_maskz_cvtepi64_epi32(__M, __A); +} + +void test_mm512_mask_cvtepi64_storeu_epi32(void* __P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.512 + return _mm512_mask_cvtepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm512_cvtepi64_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 + return _mm512_cvtepi64_epi16(__A); +} + +__m128i test_mm512_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 + return _mm512_mask_cvtepi64_epi16(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtepi64_epi16(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.512 + return _mm512_maskz_cvtepi64_epi16(__M, __A); +} + +void test_mm512_mask_cvtepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.512 + return _mm512_mask_cvtepi64_storeu_epi16(__P, __M, __A); +} +__m128i test_mm512_cvtsepi32_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 + return _mm512_cvtsepi32_epi8(__A); +} + +__m128i test_mm512_mask_cvtsepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 + return _mm512_mask_cvtsepi32_epi8(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtsepi32_epi8(__mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.512 + return _mm512_maskz_cvtsepi32_epi8(__M, __A); +} + +void test_mm512_mask_cvtsepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.512 + return _mm512_mask_cvtsepi32_storeu_epi8(__P, __M, __A); +} + +__m256i test_mm512_cvtsepi32_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 + return _mm512_cvtsepi32_epi16(__A); +} + +__m256i test_mm512_mask_cvtsepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 + return _mm512_mask_cvtsepi32_epi16(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtsepi32_epi16(__mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.512 + return _mm512_maskz_cvtsepi32_epi16(__M, __A); +} + +void test_mm512_mask_cvtsepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.512 + return _mm512_mask_cvtsepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm512_cvtsepi64_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 + return _mm512_cvtsepi64_epi8(__A); +} + +__m128i test_mm512_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 + return _mm512_mask_cvtsepi64_epi8(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtsepi64_epi8(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.512 + return _mm512_maskz_cvtsepi64_epi8(__M, __A); +} + +void test_mm512_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.512 + return _mm512_mask_cvtsepi64_storeu_epi8(__P, __M, __A); +} + +__m256i test_mm512_cvtsepi64_epi32(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 + return _mm512_cvtsepi64_epi32(__A); +} + +__m256i test_mm512_mask_cvtsepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 + return _mm512_mask_cvtsepi64_epi32(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtsepi64_epi32(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.512 + return _mm512_maskz_cvtsepi64_epi32(__M, __A); +} + +void test_mm512_mask_cvtsepi64_storeu_epi32(void *__P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.512 + return _mm512_mask_cvtsepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm512_cvtsepi64_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 + return _mm512_cvtsepi64_epi16(__A); +} + +__m128i test_mm512_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 + return _mm512_mask_cvtsepi64_epi16(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtsepi64_epi16(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.512 + return _mm512_maskz_cvtsepi64_epi16(__M, __A); +} + +void test_mm512_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtsepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.512 + return _mm512_mask_cvtsepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm512_cvtusepi32_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 + return _mm512_cvtusepi32_epi8(__A); +} + +__m128i test_mm512_mask_cvtusepi32_epi8(__m128i __O, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 + return _mm512_mask_cvtusepi32_epi8(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtusepi32_epi8(__mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.512 + return _mm512_maskz_cvtusepi32_epi8(__M, __A); +} + +void test_mm512_mask_cvtusepi32_storeu_epi8(void * __P, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.512 + return _mm512_mask_cvtusepi32_storeu_epi8(__P, __M, __A); +} + +__m256i test_mm512_cvtusepi32_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 + return _mm512_cvtusepi32_epi16(__A); +} + +__m256i test_mm512_mask_cvtusepi32_epi16(__m256i __O, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 + return _mm512_mask_cvtusepi32_epi16(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtusepi32_epi16(__mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.512 + return _mm512_maskz_cvtusepi32_epi16(__M, __A); +} + +void test_mm512_mask_cvtusepi32_storeu_epi16(void *__P, __mmask16 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.512 + return _mm512_mask_cvtusepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm512_cvtusepi64_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 + return _mm512_cvtusepi64_epi8(__A); +} + +__m128i test_mm512_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 + return _mm512_mask_cvtusepi64_epi8(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtusepi64_epi8(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.512 + return _mm512_maskz_cvtusepi64_epi8(__M, __A); +} + +void test_mm512_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.512 + return _mm512_mask_cvtusepi64_storeu_epi8(__P, __M, __A); +} + +__m256i test_mm512_cvtusepi64_epi32(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 + return _mm512_cvtusepi64_epi32(__A); +} + +__m256i test_mm512_mask_cvtusepi64_epi32(__m256i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 + return _mm512_mask_cvtusepi64_epi32(__O, __M, __A); +} + +__m256i test_mm512_maskz_cvtusepi64_epi32(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.512 + return _mm512_maskz_cvtusepi64_epi32(__M, __A); +} + +void test_mm512_mask_cvtusepi64_storeu_epi32(void* __P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.512 + return _mm512_mask_cvtusepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm512_cvtusepi64_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 + return _mm512_cvtusepi64_epi16(__A); +} + +__m128i test_mm512_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 + return _mm512_mask_cvtusepi64_epi16(__O, __M, __A); +} + +__m128i test_mm512_maskz_cvtusepi64_epi16(__mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.512 + return _mm512_maskz_cvtusepi64_epi16(__M, __A); +} +void test_mm512_mask_cvtusepi64_storeu_epi16(void *__P, __mmask8 __M, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_cvtusepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.512 + return _mm512_mask_cvtusepi64_storeu_epi16(__P, __M, __A); +} \ No newline at end of file Index: test/CodeGen/avx512vbmi-builtins.c =================================================================== --- test/CodeGen/avx512vbmi-builtins.c +++ test/CodeGen/avx512vbmi-builtins.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512vbmi -emit-llvm -o - -Werror | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vbmi -emit-llvm -o - -Werror | FileCheck %s // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H @@ -25,7 +25,25 @@ __m512i test_mm512_maskz_permutex2var_epi8(__mmask64 __U, __m512i __A, __m512i __I, __m512i __B) { // CHECK-LABEL: @test_mm512_maskz_permutex2var_epi8 - // CHECK: @llvm.x86.avx512.mask.vpermt2var.qi.512 + // CHECK: @llvm.x86.avx512.maskz.vpermt2var.qi.512 return _mm512_maskz_permutex2var_epi8(__U, __A, __I, __B); } +__m512i test_mm512_permutexvar_epi8(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.512 + return _mm512_permutexvar_epi8(__A, __B); +} + +__m512i test_mm512_maskz_permutexvar_epi8(__mmask64 __M, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.512 + return _mm512_maskz_permutexvar_epi8(__M, __A, __B); +} + +__m512i test_mm512_mask_permutexvar_epi8(__m512i __W, __mmask64 __M, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.512 + return _mm512_mask_permutexvar_epi8(__W, __M, __A, __B); +} + Index: test/CodeGen/avx512vbmivl-builtin.c =================================================================== --- test/CodeGen/avx512vbmivl-builtin.c +++ test/CodeGen/avx512vbmivl-builtin.c @@ -1,4 +1,4 @@ -// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature avx512vbmi -target-feature avx512vl -target-feature avx2 -emit-llvm -o - -Werror | FileCheck %s +// RUN: %clang_cc1 %s -triple=x86_64-apple-darwin -target-feature +avx512bw -target-feature +avx512vbmi -target-feature +avx512vl -emit-llvm -o - -Werror | FileCheck %s // Don't include mm_malloc.h, it's system specific. #define __MM_MALLOC_H @@ -50,6 +50,43 @@ __m256i test_mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { // CHECK-LABEL: @test_mm256_maskz_permutex2var_epi8 - // CHECK: @llvm.x86.avx512.mask.vpermt2var.qi.256 + // CHECK: @llvm.x86.avx512.maskz.vpermt2var.qi.256 return _mm256_maskz_permutex2var_epi8(__U, __A, __I, __B); -} \ No newline at end of file +} + +__m128i test_mm_permutexvar_epi8(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.128 + return _mm_permutexvar_epi8(__A, __B); +} + +__m128i test_mm_maskz_permutexvar_epi8(__mmask16 __M, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.128 + return _mm_maskz_permutexvar_epi8(__M, __A, __B); +} + +__m128i test_mm_mask_permutexvar_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.128 + return _mm_mask_permutexvar_epi8(__W, __M, __A, __B); +} + +__m256i test_mm256_permutexvar_epi8(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.256 + return _mm256_permutexvar_epi8(__A, __B); +} + +__m256i test_mm256_maskz_permutexvar_epi8(__mmask32 __M, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.256 + return _mm256_maskz_permutexvar_epi8(__M, __A, __B); +} + +__m256i test_mm256_mask_permutexvar_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_permutexvar_epi8 + // CHECK: @llvm.x86.avx512.mask.permvar.qi.256 + return _mm256_mask_permutexvar_epi8(__W, __M, __A, __B); +} + Index: test/CodeGen/avx512vl-builtins.c =================================================================== --- test/CodeGen/avx512vl-builtins.c +++ test/CodeGen/avx512vl-builtins.c @@ -4032,30 +4032,6 @@ return _mm256_maskz_movedup_pd(__U, __A); } -__m128i test_mm_mask_set1_epi32(__m128i __O, __mmask8 __M) { - // CHECK-LABEL: @test_mm_mask_set1_epi32 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.128 - return _mm_mask_set1_epi32(__O, __M, 5); -} - -__m128i test_mm_maskz_set1_epi32(__mmask8 __M) { - // CHECK-LABEL: @test_mm_maskz_set1_epi32 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.128 - return _mm_maskz_set1_epi32(__M, 5); -} - -__m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M) { - // CHECK-LABEL: @test_mm256_mask_set1_epi32 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256 - return _mm256_mask_set1_epi32(__O, __M, 5); -} - -__m256i test_mm256_maskz_set1_epi32(__mmask8 __M) { - // CHECK-LABEL: @test_mm256_maskz_set1_epi32 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256 - return _mm256_maskz_set1_epi32(__M, 5); -} - __m128i test_mm_mask_set1_epi64(__m128i __O, __mmask8 __M, long long __A) { // CHECK-LABEL: @test_mm_mask_set1_epi64 // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.128 @@ -4068,12 +4044,6 @@ return _mm_maskz_set1_epi64(__M, __A); } -__m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) { - // CHECK-LABEL: @test_mm256_mask_set1_epi64 - // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256 - return _mm256_mask_set1_epi64(__O, __M, __A); -} - __m256i test_mm256_maskz_set1_epi64(__mmask8 __M, long long __A) { // CHECK-LABEL: @test_mm256_maskz_set1_epi64 // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256 @@ -4296,3 +4266,1246 @@ return _mm256_maskz_loadu_ps(__U, __P); } +__m128i test_mm_alignr_epi32(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.128 + return _mm_alignr_epi32(__A, __B, 1); +} + +__m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.128 + return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); +} + +__m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.128 + return _mm_maskz_alignr_epi32(__U, __A, __B, 1); +} + +__m256i test_mm256_alignr_epi32(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.256 + return _mm256_alignr_epi32(__A, __B, 1); +} + +__m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.256 + return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); +} + +__m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_alignr_epi32 + // CHECK: @llvm.x86.avx512.mask.valign.d.256 + return _mm256_maskz_alignr_epi32(__U, __A, __B, 1); +} + +__m128i test_mm_alignr_epi64(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.128 + return _mm_alignr_epi64(__A, __B, 1); +} + +__m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.128 + return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); +} + +__m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.128 + return _mm_maskz_alignr_epi64(__U, __A, __B, 1); +} + +__m256i test_mm256_alignr_epi64(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.256 + return _mm256_alignr_epi64(__A, __B, 1); +} + +__m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.256 + return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); +} + +__m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_alignr_epi64 + // CHECK: @llvm.x86.avx512.mask.valign.q.256 + return _mm256_maskz_alignr_epi64(__U, __A, __B, 1); +} + +__m256 test_mm256_broadcast_f32x4(__m128 __A) { + // CHECK-LABEL: @test_mm256_broadcast_f32x4 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 + return _mm256_broadcast_f32x4(__A); +} + +__m256 test_mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm256_mask_broadcast_f32x4 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 + return _mm256_mask_broadcast_f32x4(__O, __M, __A); +} + +__m256 test_mm256_maskz_broadcast_f32x4(__mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x4 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x4 + return _mm256_maskz_broadcast_f32x4(__M, __A); +} + +__m256i test_mm256_broadcast_i32x4(__m128i __A) { + // CHECK-LABEL: @test_mm256_broadcast_i32x4 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 + return _mm256_broadcast_i32x4(__A); +} + +__m256i test_mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcast_i32x4 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 + return _mm256_mask_broadcast_i32x4(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x4 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x4 + return _mm256_maskz_broadcast_i32x4(__M, __A); +} + +__m256d test_mm256_mask_broadcastsd_pd(__m256d __O, __mmask8 __M, __m128d __A) { + // CHECK-LABEL: @test_mm256_mask_broadcastsd_pd + // CHECK: @llvm.x86.avx512.mask.broadcast.sd.pd.256 + return _mm256_mask_broadcastsd_pd(__O, __M, __A); +} + +__m256d test_mm256_maskz_broadcastsd_pd(__mmask8 __M, __m128d __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcastsd_pd + // CHECK: @llvm.x86.avx512.mask.broadcast.sd.pd.256 + return _mm256_maskz_broadcastsd_pd(__M, __A); +} + +__m128 test_mm_mask_broadcastss_ps(__m128 __O, __mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_broadcastss_ps + // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.128 + return _mm_mask_broadcastss_ps(__O, __M, __A); +} + +__m128 test_mm_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm_maskz_broadcastss_ps + // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.128 + return _mm_maskz_broadcastss_ps(__M, __A); +} + +__m256 test_mm256_mask_broadcastss_ps(__m256 __O, __mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm256_mask_broadcastss_ps + // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.256 + return _mm256_mask_broadcastss_ps(__O, __M, __A); +} + +__m256 test_mm256_maskz_broadcastss_ps(__mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcastss_ps + // CHECK: @llvm.x86.avx512.mask.broadcast.ss.ps.256 + return _mm256_maskz_broadcastss_ps(__M, __A); +} + +__m128 test_mm256_extractf32x4_ps(__m256 __A) { + // CHECK-LABEL: @test_mm256_extractf32x4_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x4 + return _mm256_extractf32x4_ps(__A, 1); +} + +__m128 test_mm256_mask_extractf32x4_ps(__m128 __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_extractf32x4_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x4 + return _mm256_mask_extractf32x4_ps(__W, __U, __A, 1); +} + +__m128 test_mm256_maskz_extractf32x4_ps(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_maskz_extractf32x4_ps + // CHECK: @llvm.x86.avx512.mask.vextractf32x4 + return _mm256_maskz_extractf32x4_ps(__U, __A, 1); +} + +__m128i test_mm256_extracti32x4_epi32(__m256i __A) { + // CHECK-LABEL: @test_mm256_extracti32x4_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + return _mm256_extracti32x4_epi32(__A, 1); +} + +__m128i test_mm256_mask_extracti32x4_epi32(__m128i __W, __mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_extracti32x4_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + return _mm256_mask_extracti32x4_epi32(__W, __U, __A, 1); +} + +__m128i test_mm256_maskz_extracti32x4_epi32(__mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_extracti32x4_epi32 + // CHECK: @llvm.x86.avx512.mask.vextracti32x4 + return _mm256_maskz_extracti32x4_epi32(__U, __A, 1); +} + +__m128d test_mm_getmant_pd(__m128d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.128 + return _mm_getmant_pd(__A, 1, 2); +} + +__m128d test_mm_mask_getmant_pd(__m128d __W, __mmask8 __U, __m128d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm_mask_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.128 + return _mm_mask_getmant_pd(__W, __U, __A, 1, 2); +} + +__m128d test_mm_maskz_getmant_pd(__mmask8 __U, __m128d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm_maskz_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.128 + return _mm_maskz_getmant_pd(__U, __A, 1, 2); +} + +__m256d test_mm256_getmant_pd(__m256d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm256_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.256 + return _mm256_getmant_pd(__A, 1, 2); +} + +__m256d test_mm256_mask_getmant_pd(__m256d __W, __mmask8 __U, __m256d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm256_mask_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.256 + return _mm256_mask_getmant_pd(__W, __U, __A, 1, 2); +} + +__m256d test_mm256_maskz_getmant_pd(__mmask8 __U, __m256d __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm256_maskz_getmant_pd + // CHECK: @llvm.x86.avx512.mask.getmant.pd.256 + return _mm256_maskz_getmant_pd(__U, __A, 1, 2); +} + +__m128 test_mm_getmant_ps(__m128 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.128 + return _mm_getmant_ps(__A, 1, 2); +} + +__m128 test_mm_mask_getmant_ps(__m128 __W, __mmask8 __U, __m128 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm_mask_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.128 + return _mm_mask_getmant_ps(__W, __U, __A, 1, 2); +} + +__m128 test_mm_maskz_getmant_ps(__mmask8 __U, __m128 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm_maskz_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.128 + return _mm_maskz_getmant_ps(__U, __A, 1, 2); +} + +__m256 test_mm256_getmant_ps(__m256 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm256_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.256 + return _mm256_getmant_ps(__A, 1, 2); +} + +__m256 test_mm256_mask_getmant_ps(__m256 __W, __mmask8 __U, __m256 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm256_mask_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.256 + return _mm256_mask_getmant_ps(__W, __U, __A, 1, 2); +} + +__m256 test_mm256_maskz_getmant_ps(__mmask8 __U, __m256 __A, _MM_MANTISSA_NORM_ENUM __B, _MM_MANTISSA_SIGN_ENUM __C) { + // CHECK-LABEL: @test_mm256_maskz_getmant_ps + // CHECK: @llvm.x86.avx512.mask.getmant.ps.256 + return _mm256_maskz_getmant_ps(__U, __A, 1, 2); +} + +__m256 test_mm256_insertf32x4(__m256 __A, __m128 __B) { + // CHECK-LABEL: @test_mm256_insertf32x4 + // CHECK: @llvm.x86.avx512.mask.insertf32x4 + return _mm256_insertf32x4(__A, __B, 1); +} + +__m256 test_mm256_mask_insertf32x4(__m256 __W, __mmask8 __U, __m256 __A, __m128 __B) { + // CHECK-LABEL: @test_mm256_mask_insertf32x4 + // CHECK: @llvm.x86.avx512.mask.insertf32x4 + return _mm256_mask_insertf32x4(__W, __U, __A, __B, 1); +} + +__m256 test_mm256_maskz_insertf32x4(__mmask8 __U, __m256 __A, __m128 __B) { + // CHECK-LABEL: @test_mm256_maskz_insertf32x4 + // CHECK: @llvm.x86.avx512.mask.insertf32x4 + return _mm256_maskz_insertf32x4(__U, __A, __B, 1); +} + +__m256i test_mm256_inserti32x4(__m256i __A, __m128i __B) { + // CHECK-LABEL: @test_mm256_inserti32x4 + // CHECK: @llvm.x86.avx512.mask.inserti32x4 + return _mm256_inserti32x4(__A, __B, 1); +} + +__m256i test_mm256_mask_inserti32x4(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { + // CHECK-LABEL: @test_mm256_mask_inserti32x4 + // CHECK: @llvm.x86.avx512.mask.inserti32x4 + return _mm256_mask_inserti32x4(__W, __U, __A, __B, 1); +} + +__m256i test_mm256_maskz_inserti32x4(__mmask8 __U, __m256i __A, __m128i __B) { + // CHECK-LABEL: @test_mm256_maskz_inserti32x4 + // CHECK: @llvm.x86.avx512.mask.inserti32x4 + return _mm256_maskz_inserti32x4(__U, __A, __B, 1); +} + +__m128 test_mm_mask_movehdup_ps(__m128 __W, __mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.128 + return _mm_mask_movehdup_ps(__W, __U, __A); +} + +__m128 test_mm_maskz_movehdup_ps(__mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_maskz_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.128 + return _mm_maskz_movehdup_ps(__U, __A); +} + +__m256 test_mm256_mask_movehdup_ps(__m256 __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.256 + return _mm256_mask_movehdup_ps(__W, __U, __A); +} + +__m256 test_mm256_maskz_movehdup_ps(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_maskz_movehdup_ps + // CHECK: @llvm.x86.avx512.mask.movshdup.256 + return _mm256_maskz_movehdup_ps(__U, __A); +} + +__m128 test_mm_mask_moveldup_ps(__m128 __W, __mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.128 + return _mm_mask_moveldup_ps(__W, __U, __A); +} + +__m128 test_mm_maskz_moveldup_ps(__mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_maskz_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.128 + return _mm_maskz_moveldup_ps(__U, __A); +} + +__m256 test_mm256_mask_moveldup_ps(__m256 __W, __mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.256 + return _mm256_mask_moveldup_ps(__W, __U, __A); +} + +__m256 test_mm256_maskz_moveldup_ps(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_maskz_moveldup_ps + // CHECK: @llvm.x86.avx512.mask.movsldup.256 + return _mm256_maskz_moveldup_ps(__U, __A); +} + +__m128i test_mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { + // CHECK-LABEL: @test_mm_mask_set1_epi32 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.128 + return _mm_mask_set1_epi32(__O, __M, __A); +} + +__m128i test_mm_maskz_set1_epi32(__mmask8 __M, int __A) { + // CHECK-LABEL: @test_mm_maskz_set1_epi32 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.128 + return _mm_maskz_set1_epi32(__M, __A); +} + +__m128i test_mm_mask_broadcastd_epi32(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_broadcastd_epi32 + // CHECK: @llvm.x86.avx512.pbroadcastd.128 + return _mm_mask_broadcastd_epi32(__O, __M, __A); +} + +__m128i test_mm_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_broadcastd_epi32 + // CHECK: @llvm.x86.avx512.pbroadcastd.128 + return _mm_maskz_broadcastd_epi32(__M, __A); +} + +__m256i test_mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi32 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256 + return _mm256_mask_set1_epi32(__O, __M, __A); +} + +__m256i test_mm256_maskz_set1_epi32(__mmask8 __M, int __A) { + // CHECK-LABEL: @test_mm256_maskz_set1_epi32 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.d.gpr.256 + return _mm256_maskz_set1_epi32(__M, __A); +} + +__m256i test_mm256_mask_broadcastd_epi32(__m256i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcastd_epi32 + // CHECK: @llvm.x86.avx512.pbroadcastd.256 + return _mm256_mask_broadcastd_epi32(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcastd_epi32(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcastd_epi32 + // CHECK: @llvm.x86.avx512.pbroadcastd.256 + return _mm256_maskz_broadcastd_epi32(__M, __A); +} + +__m128i test_mm_mask_broadcastq_epi64(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_broadcastq_epi64 + // CHECK: @llvm.x86.avx512.pbroadcastq.128 + return _mm_mask_broadcastq_epi64(__O, __M, __A); +} + +__m128i test_mm_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_broadcastq_epi64 + // CHECK: @llvm.x86.avx512.pbroadcastq.128 + return _mm_maskz_broadcastq_epi64(__M, __A); +} + +__m256i test_mm256_mask_set1_epi64(__m256i __O, __mmask8 __M, long long __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi64 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.q.gpr.256 + return _mm256_mask_set1_epi64(__O, __M, __A); +} + +__m256i test_mm256_mask_broadcastq_epi64(__m256i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcastq_epi64 + // CHECK: @llvm.x86.avx512.pbroadcastq.256 + return _mm256_mask_broadcastq_epi64(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcastq_epi64(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcastq_epi64 + // CHECK: @llvm.x86.avx512.pbroadcastq.256 + return _mm256_maskz_broadcastq_epi64(__M, __A); +} + +__m256d test_mm256_mask_permutex_pd(__m256d __W, __mmask8 __U, __m256d __X) { + // CHECK-LABEL: @test_mm256_mask_permutex_pd + // CHECK: @llvm.x86.avx512.mask.perm.df.256 + return _mm256_mask_permutex_pd(__W, __U, __X, 1); +} + +__m256d test_mm256_maskz_permutex_pd(__mmask8 __U, __m256d __X) { + // CHECK-LABEL: @test_mm256_maskz_permutex_pd + // CHECK: @llvm.x86.avx512.mask.perm.df.256 + return _mm256_maskz_permutex_pd(__U, __X, 1); +} + +__m256d test_mm256_permutex_pd(__m256d __X) { + // CHECK-LABEL: @test_mm256_permutex_pd + // CHECK: @llvm.x86.avx512.mask.perm.df.256 + return _mm256_permutex_pd(__X, 3); +} + +__m256i test_mm256_mask_permutex_epi64(__m256i __W, __mmask8 __M, __m256i __X) { + // CHECK-LABEL: @test_mm256_mask_permutex_epi64 + // CHECK: @llvm.x86.avx512.mask.perm.di.256 + return _mm256_mask_permutex_epi64(__W, __M, __X, 3); +} + +__m256i test_mm256_maskz_permutex_epi64(__mmask8 __M, __m256i __X) { + // CHECK-LABEL: @test_mm256_maskz_permutex_epi64 + // CHECK: @llvm.x86.avx512.mask.perm.di.256 + return _mm256_maskz_permutex_epi64(__M, __X, 3); +} + +__m256d test_mm256_permutexvar_pd(__m256i __X, __m256d __Y) { + // CHECK-LABEL: @test_mm256_permutexvar_pd + // CHECK: @llvm.x86.avx512.mask.permvar.df.256 + return _mm256_permutexvar_pd(__X, __Y); +} + +__m256d test_mm256_mask_permutexvar_pd(__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { + // CHECK-LABEL: @test_mm256_mask_permutexvar_pd + // CHECK: @llvm.x86.avx512.mask.permvar.df.256 + return _mm256_mask_permutexvar_pd(__W, __U, __X, __Y); +} + +__m256d test_mm256_maskz_permutexvar_pd(__mmask8 __U, __m256i __X, __m256d __Y) { + // CHECK-LABEL: @test_mm256_maskz_permutexvar_pd + // CHECK: @llvm.x86.avx512.mask.permvar.df.256 + return _mm256_maskz_permutexvar_pd(__U, __X, __Y); +} + +__m256i test_mm256_maskz_permutexvar_epi64(__mmask8 __M, __m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi64 + // CHECK: @llvm.x86.avx512.mask.permvar.di.256 + return _mm256_maskz_permutexvar_epi64(__M, __X, __Y); +} + +__m256i test_mm256_mask_permutexvar_epi64(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_mask_permutexvar_epi64 + // CHECK: @llvm.x86.avx512.mask.permvar.di.256 + return _mm256_mask_permutexvar_epi64(__W, __M, __X, __Y); +} + +__m256 test_mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { + // CHECK-LABEL: @test_mm256_mask_permutexvar_ps + // CHECK: @llvm.x86.avx512.mask.permvar.sf.256 + return _mm256_mask_permutexvar_ps(__W, __U, __X, __Y); +} + +__m256 test_mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { + // CHECK-LABEL: @test_mm256_maskz_permutexvar_ps + // CHECK: @llvm.x86.avx512.mask.permvar.sf.256 + return _mm256_maskz_permutexvar_ps(__U, __X, __Y); +} + +__m256i test_mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi32 + // CHECK: @llvm.x86.avx512.mask.permvar.si.256 + return _mm256_maskz_permutexvar_epi32(__M, __X, __Y); +} + +__m256i test_mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { + // CHECK-LABEL: @test_mm256_mask_permutexvar_epi32 + // CHECK: @llvm.x86.avx512.mask.permvar.si.256 + return _mm256_mask_permutexvar_epi32(__W, __M, __X, __Y); +} + +__m128i test_mm_cvtepi32_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + return _mm_cvtepi32_epi8(__A); +} + +__m128i test_mm_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + return _mm_mask_cvtepi32_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtepi32_epi8(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.128 + return _mm_maskz_cvtepi32_epi8(__M, __A); +} + +void test_mm_mask_cvtepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.128 + return _mm_mask_cvtepi32_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm256_cvtepi32_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + return _mm256_cvtepi32_epi8(__A); +} + +__m128i test_mm256_mask_cvtepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + return _mm256_mask_cvtepi32_epi8(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtepi32_epi8(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.256 + return _mm256_maskz_cvtepi32_epi8(__M, __A); +} + +void test_mm256_mask_cvtepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.db.mem.256 + return _mm256_mask_cvtepi32_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm_cvtepi32_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + return _mm_cvtepi32_epi16(__A); +} + +__m128i test_mm_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + return _mm_mask_cvtepi32_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtepi32_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.128 + return _mm_maskz_cvtepi32_epi16(__M, __A); +} + +void test_mm_mask_cvtepi32_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.128 + return _mm_mask_cvtepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm256_cvtepi32_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.256 + return _mm256_cvtepi32_epi16(__A); +} + +__m128i test_mm256_mask_cvtepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.256 + return _mm256_mask_cvtepi32_epi16(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtepi32_epi16(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.256 + return _mm256_maskz_cvtepi32_epi16(__M, __A); +} + +void test_mm256_mask_cvtepi32_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.dw.mem.256 + return _mm256_mask_cvtepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm_cvtepi64_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + return _mm_cvtepi64_epi8(__A); +} + +__m128i test_mm_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + return _mm_mask_cvtepi64_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtepi64_epi8(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.128 + return _mm_maskz_cvtepi64_epi8(__M, __A); +} + +void test_mm_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.128 + return _mm_mask_cvtepi64_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm256_cvtepi64_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + return _mm256_cvtepi64_epi8(__A); +} + +__m128i test_mm256_mask_cvtepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + return _mm256_mask_cvtepi64_epi8(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtepi64_epi8(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.256 + return _mm256_maskz_cvtepi64_epi8(__M, __A); +} + +void test_mm256_mask_cvtepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmov.qb.mem.256 + return _mm256_mask_cvtepi64_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm_cvtepi64_epi32(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + return _mm_cvtepi64_epi32(__A); +} + +__m128i test_mm_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + return _mm_mask_cvtepi64_epi32(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtepi64_epi32(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.128 + return _mm_maskz_cvtepi64_epi32(__M, __A); +} + +void test_mm_mask_cvtepi64_storeu_epi32(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.128 + return _mm_mask_cvtepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm256_cvtepi64_epi32(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.256 + return _mm256_cvtepi64_epi32(__A); +} + +__m128i test_mm256_mask_cvtepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.256 + return _mm256_mask_cvtepi64_epi32(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtepi64_epi32(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.256 + return _mm256_maskz_cvtepi64_epi32(__M, __A); +} + +void test_mm256_mask_cvtepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmov.qd.mem.256 + return _mm256_mask_cvtepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm_cvtepi64_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + return _mm_cvtepi64_epi16(__A); +} + +__m128i test_mm_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + return _mm_mask_cvtepi64_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtepi64_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.128 + return _mm_maskz_cvtepi64_epi16(__M, __A); +} + +void test_mm_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.128 + return _mm_mask_cvtepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm256_cvtepi64_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + return _mm256_cvtepi64_epi16(__A); +} + +__m128i test_mm256_mask_cvtepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + return _mm256_mask_cvtepi64_epi16(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtepi64_epi16(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.256 + return _mm256_maskz_cvtepi64_epi16(__M, __A); +} + +void test_mm256_mask_cvtepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmov.qw.mem.256 + return _mm256_mask_cvtepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm_cvtsepi32_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.128 + return _mm_cvtsepi32_epi8(__A); +} + +__m128i test_mm_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.128 + return _mm_mask_cvtsepi32_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtsepi32_epi8(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.128 + return _mm_maskz_cvtsepi32_epi8(__M, __A); +} + +void test_mm_mask_cvtsepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.128 + return _mm_mask_cvtsepi32_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm256_cvtsepi32_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.256 + return _mm256_cvtsepi32_epi8(__A); +} + +__m128i test_mm256_mask_cvtsepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.256 + return _mm256_mask_cvtsepi32_epi8(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtsepi32_epi8(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtsepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.256 + return _mm256_maskz_cvtsepi32_epi8(__M, __A); +} + +void test_mm256_mask_cvtsepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.db.mem.256 + return _mm256_mask_cvtsepi32_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm_cvtsepi32_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.128 + return _mm_cvtsepi32_epi16(__A); +} + +__m128i test_mm_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.128 + return _mm_mask_cvtsepi32_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtsepi32_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.128 + return _mm_maskz_cvtsepi32_epi16(__M, __A); +} + +void test_mm_mask_cvtsepi32_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.128 + return _mm_mask_cvtsepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm256_cvtsepi32_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.256 + return _mm256_cvtsepi32_epi16(__A); +} + +__m128i test_mm256_mask_cvtsepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.256 + return _mm256_mask_cvtsepi32_epi16(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtsepi32_epi16(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtsepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.256 + return _mm256_maskz_cvtsepi32_epi16(__M, __A); +} + +void test_mm256_mask_cvtsepi32_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.dw.mem.256 + return _mm256_mask_cvtsepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm_cvtsepi64_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.128 + return _mm_cvtsepi64_epi8(__A); +} + +__m128i test_mm_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.128 + return _mm_mask_cvtsepi64_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtsepi64_epi8(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.128 + return _mm_maskz_cvtsepi64_epi8(__M, __A); +} + +void test_mm_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.128 + return _mm_mask_cvtsepi64_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm256_cvtsepi64_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.256 + return _mm256_cvtsepi64_epi8(__A); +} + +__m128i test_mm256_mask_cvtsepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.256 + return _mm256_mask_cvtsepi64_epi8(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtsepi64_epi8(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtsepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.256 + return _mm256_maskz_cvtsepi64_epi8(__M, __A); +} + +void test_mm256_mask_cvtsepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovs.qb.mem.256 + return _mm256_mask_cvtsepi64_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm_cvtsepi64_epi32(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.128 + return _mm_cvtsepi64_epi32(__A); +} + +__m128i test_mm_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.128 + return _mm_mask_cvtsepi64_epi32(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtsepi64_epi32(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.128 + return _mm_maskz_cvtsepi64_epi32(__M, __A); +} + +void test_mm_mask_cvtsepi64_storeu_epi32(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.128 + return _mm_mask_cvtsepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm256_cvtsepi64_epi32(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.256 + return _mm256_cvtsepi64_epi32(__A); +} + +__m128i test_mm256_mask_cvtsepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.256 + return _mm256_mask_cvtsepi64_epi32(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtsepi64_epi32(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtsepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.256 + return _mm256_maskz_cvtsepi64_epi32(__M, __A); +} + +void test_mm256_mask_cvtsepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovs.qd.mem.256 + return _mm256_mask_cvtsepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm_cvtsepi64_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.128 + return _mm_cvtsepi64_epi16(__A); +} + +__m128i test_mm_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.128 + return _mm_mask_cvtsepi64_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtsepi64_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.128 + return _mm_maskz_cvtsepi64_epi16(__M, __A); +} + +void test_mm_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtsepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.128 + return _mm_mask_cvtsepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm256_cvtsepi64_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.256 + return _mm256_cvtsepi64_epi16(__A); +} + +__m128i test_mm256_mask_cvtsepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.256 + return _mm256_mask_cvtsepi64_epi16(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtsepi64_epi16(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtsepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.256 + return _mm256_maskz_cvtsepi64_epi16(__M, __A); +} + +void test_mm256_mask_cvtsepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtsepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovs.qw.mem.256 + return _mm256_mask_cvtsepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm_cvtusepi32_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.128 + return _mm_cvtusepi32_epi8(__A); +} + +__m128i test_mm_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.128 + return _mm_mask_cvtusepi32_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtusepi32_epi8(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.128 + return _mm_maskz_cvtusepi32_epi8(__M, __A); +} + +void test_mm_mask_cvtusepi32_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.128 + return _mm_mask_cvtusepi32_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm256_cvtusepi32_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.256 + return _mm256_cvtusepi32_epi8(__A); +} + +__m128i test_mm256_mask_cvtusepi32_epi8(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.256 + return _mm256_mask_cvtusepi32_epi8(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtusepi32_epi8(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtusepi32_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.256 + return _mm256_maskz_cvtusepi32_epi8(__M, __A); +} + +void test_mm256_mask_cvtusepi32_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi32_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.db.mem.256 + return _mm256_mask_cvtusepi32_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm_cvtusepi32_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.128 + return _mm_cvtusepi32_epi16(__A); +} + +__m128i test_mm_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.128 + return _mm_mask_cvtusepi32_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtusepi32_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.128 + return _mm_maskz_cvtusepi32_epi16(__M, __A); +} + +void test_mm_mask_cvtusepi32_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.128 + return _mm_mask_cvtusepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm256_cvtusepi32_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.256 + return _mm256_cvtusepi32_epi16(__A); +} + +__m128i test_mm256_mask_cvtusepi32_epi16(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.256 + return _mm256_mask_cvtusepi32_epi16(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtusepi32_epi16(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtusepi32_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.256 + return _mm256_maskz_cvtusepi32_epi16(__M, __A); +} + +void test_mm256_mask_cvtusepi32_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi32_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.dw.mem.256 + return _mm256_mask_cvtusepi32_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm_cvtusepi64_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.128 + return _mm_cvtusepi64_epi8(__A); +} + +__m128i test_mm_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.128 + return _mm_mask_cvtusepi64_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtusepi64_epi8(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.128 + return _mm_maskz_cvtusepi64_epi8(__M, __A); +} + +void test_mm_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.128 + return _mm_mask_cvtusepi64_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm256_cvtusepi64_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.256 + return _mm256_cvtusepi64_epi8(__A); +} + +__m128i test_mm256_mask_cvtusepi64_epi8(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.256 + return _mm256_mask_cvtusepi64_epi8(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtusepi64_epi8(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtusepi64_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.256 + return _mm256_maskz_cvtusepi64_epi8(__M, __A); +} + +void test_mm256_mask_cvtusepi64_storeu_epi8(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi64_storeu_epi8 + // CHECK: @llvm.x86.avx512.mask.pmovus.qb.mem.256 + return _mm256_mask_cvtusepi64_storeu_epi8(__P, __M, __A); +} + +__m128i test_mm_cvtusepi64_epi32(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.128 + return _mm_cvtusepi64_epi32(__A); +} + +__m128i test_mm_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.128 + return _mm_mask_cvtusepi64_epi32(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtusepi64_epi32(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.128 + return _mm_maskz_cvtusepi64_epi32(__M, __A); +} + +void test_mm_mask_cvtusepi64_storeu_epi32(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.128 + return _mm_mask_cvtusepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm256_cvtusepi64_epi32(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.256 + return _mm256_cvtusepi64_epi32(__A); +} + +__m128i test_mm256_mask_cvtusepi64_epi32(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.256 + return _mm256_mask_cvtusepi64_epi32(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtusepi64_epi32(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtusepi64_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.256 + return _mm256_maskz_cvtusepi64_epi32(__M, __A); +} + +void test_mm256_mask_cvtusepi64_storeu_epi32(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi64_storeu_epi32 + // CHECK: @llvm.x86.avx512.mask.pmovus.qd.mem.256 + return _mm256_mask_cvtusepi64_storeu_epi32(__P, __M, __A); +} + +__m128i test_mm_cvtusepi64_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.128 + return _mm_cvtusepi64_epi16(__A); +} + +__m128i test_mm_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.128 + return _mm_mask_cvtusepi64_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_cvtusepi64_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.128 + return _mm_maskz_cvtusepi64_epi16(__M, __A); +} + +void test_mm_mask_cvtusepi64_storeu_epi16(void * __P, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_cvtusepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.128 + return _mm_mask_cvtusepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm256_cvtusepi64_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.256 + return _mm256_cvtusepi64_epi16(__A); +} + +__m128i test_mm256_mask_cvtusepi64_epi16(__m128i __O, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.256 + return _mm256_mask_cvtusepi64_epi16(__O, __M, __A); +} + +__m128i test_mm256_maskz_cvtusepi64_epi16(__mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_cvtusepi64_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.256 + return _mm256_maskz_cvtusepi64_epi16(__M, __A); +} + +void test_mm256_mask_cvtusepi64_storeu_epi16(void * __P, __mmask8 __M, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_cvtusepi64_storeu_epi16 + // CHECK: @llvm.x86.avx512.mask.pmovus.qw.mem.256 + return _mm256_mask_cvtusepi64_storeu_epi16(__P, __M, __A); +} + +__m128i test_mm_mask_shuffle_epi32(__m128i __W, __mmask8 __U, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_shuffle_epi32 + // CHECK: @llvm.x86.avx512.mask.pshuf.d.128 + return _mm_mask_shuffle_epi32(__W, __U, __A, 1); +} + +__m128i test_mm_maskz_shuffle_epi32(__mmask8 __U, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_shuffle_epi32 + // CHECK: @llvm.x86.avx512.mask.pshuf.d.128 + return _mm_maskz_shuffle_epi32(__U, __A, 2); +} + +__m256i test_mm256_mask_shuffle_epi32(__m256i __W, __mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_shuffle_epi32 + // CHECK: @llvm.x86.avx512.mask.pshuf.d.256 + return _mm256_mask_shuffle_epi32(__W, __U, __A, 2); +} + +__m256i test_mm256_maskz_shuffle_epi32(__mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_shuffle_epi32 + // CHECK: @llvm.x86.avx512.mask.pshuf.d.256 + return _mm256_maskz_shuffle_epi32(__U, __A, 2); +} + + Index: test/CodeGen/avx512vlbw-builtins.c =================================================================== --- test/CodeGen/avx512vlbw-builtins.c +++ test/CodeGen/avx512vlbw-builtins.c @@ -2053,3 +2053,171 @@ return _mm256_maskz_loadu_epi8(__U, __P); } +__m128i test_mm_dbsad_epu8(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128 + return _mm_dbsad_epu8(__A, __B, 170); +} + +__m128i test_mm_mask_dbsad_epu8(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128 + return _mm_mask_dbsad_epu8(__W, __U, __A, __B, 170); +} + +__m128i test_mm_maskz_dbsad_epu8(__mmask8 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.128 + return _mm_maskz_dbsad_epu8(__U, __A, __B, 170); +} + +__m256i test_mm256_dbsad_epu8(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256 + return _mm256_dbsad_epu8(__A, __B, 170); +} + +__m256i test_mm256_mask_dbsad_epu8(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256 + return _mm256_mask_dbsad_epu8(__W, __U, __A, __B, 170); +} + +__m256i test_mm256_maskz_dbsad_epu8(__mmask16 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_dbsad_epu8 + // CHECK: @llvm.x86.avx512.mask.dbpsadbw.256 + return _mm256_maskz_dbsad_epu8(__U, __A, __B, 170); +} + +__m128i test_mm_mask_alignr_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.128 + return _mm_mask_alignr_epi8(__W, __U, __A, __B, 2); +} + +__m128i test_mm_maskz_alignr_epi8(__mmask16 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.128 + return _mm_maskz_alignr_epi8(__U, __A, __B, 2); +} + +__m256i test_mm256_mask_alignr_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.256 + return _mm256_mask_alignr_epi8(__W, __U, __A, __B, 2); +} + +__m256i test_mm256_maskz_alignr_epi8(__mmask32 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_alignr_epi8 + // CHECK: @llvm.x86.avx512.mask.palignr.256 + return _mm256_maskz_alignr_epi8(__U, __A, __B, 2); +} + +__m128i test_mm_mask_broadcastb_epi8(__m128i __O, __mmask16 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.128 + return _mm_mask_broadcastb_epi8(__O, __M, __A); +} + +__m128i test_mm_maskz_broadcastb_epi8(__mmask16 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.128 + return _mm_maskz_broadcastb_epi8(__M, __A); +} + +__m256i test_mm256_mask_broadcastb_epi8(__m256i __O, __mmask32 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.256 + return _mm256_mask_broadcastb_epi8(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcastb_epi8(__mmask32 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcastb_epi8 + // CHECK: @llvm.x86.avx512.pbroadcastb.256 + return _mm256_maskz_broadcastb_epi8(__M, __A); +} + +__m128i test_mm_mask_set1_epi16(__m128i __O, __mmask8 __M, short __A) { + // CHECK-LABEL: @test_mm_mask_set1_epi16 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.w.gpr.128 + return _mm_mask_set1_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_set1_epi16(__mmask8 __M, short __A) { + // CHECK-LABEL: @test_mm_maskz_set1_epi16 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.w.gpr.128 + return _mm_maskz_set1_epi16(__M, __A); +} + +__m128i test_mm_mask_broadcastw_epi16(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.128 + return _mm_mask_broadcastw_epi16(__O, __M, __A); +} + +__m128i test_mm_maskz_broadcastw_epi16(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.128 + return _mm_maskz_broadcastw_epi16(__M, __A); +} + +__m256i test_mm256_mask_set1_epi16(__m256i __O, __mmask16 __M, short __A) { + // CHECK-LABEL: @test_mm256_mask_set1_epi16 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.w.gpr.256 + return _mm256_mask_set1_epi16(__O, __M, __A); +} + +__m256i test_mm256_maskz_set1_epi16(__mmask16 __M, short __A) { + // CHECK-LABEL: @test_mm256_maskz_set1_epi16 + // CHECK: @llvm.x86.avx512.mask.pbroadcast.w.gpr.256 + return _mm256_maskz_set1_epi16(__M, __A); +} + +__m256i test_mm256_mask_broadcastw_epi16(__m256i __O, __mmask16 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.256 + return _mm256_mask_broadcastw_epi16(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcastw_epi16(__mmask16 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcastw_epi16 + // CHECK: @llvm.x86.avx512.pbroadcastw.256 + return _mm256_maskz_broadcastw_epi16(__M, __A); +} + +__m128i test_mm_permutexvar_epi16(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.128 + return _mm_permutexvar_epi16(__A, __B); +} + +__m128i test_mm_maskz_permutexvar_epi16(__mmask8 __M, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_maskz_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.128 + return _mm_maskz_permutexvar_epi16(__M, __A, __B); +} + +__m128i test_mm_mask_permutexvar_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm_mask_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.128 + return _mm_mask_permutexvar_epi16(__W, __M, __A, __B); +} + +__m256i test_mm256_permutexvar_epi16(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.256 + return _mm256_permutexvar_epi16(__A, __B); +} + +__m256i test_mm256_maskz_permutexvar_epi16(__mmask16 __M, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.256 + return _mm256_maskz_permutexvar_epi16(__M, __A, __B); +} + +__m256i test_mm256_mask_permutexvar_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_permutexvar_epi16 + // CHECK: @llvm.x86.avx512.mask.permvar.hi.256 + return _mm256_mask_permutexvar_epi16(__W, __M, __A, __B); +} + Index: test/CodeGen/avx512vldq-builtins.c =================================================================== --- test/CodeGen/avx512vldq-builtins.c +++ test/CodeGen/avx512vldq-builtins.c @@ -808,3 +808,214 @@ // CHECK: @llvm.x86.avx512.mask.reduce.ps.256 return _mm256_maskz_reduce_ps(__U, __A, 4); } + +__m256 test_mm256_broadcast_f32x2(__m128 __A) { + // CHECK-LABEL: @test_mm256_broadcast_f32x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + return _mm256_broadcast_f32x2(__A); +} + +__m256 test_mm256_mask_broadcast_f32x2(__m256 __O, __mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm256_mask_broadcast_f32x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + return _mm256_mask_broadcast_f32x2(__O, __M, __A); +} + +__m256 test_mm256_maskz_broadcast_f32x2(__mmask8 __M, __m128 __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcast_f32x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf32x2 + return _mm256_maskz_broadcast_f32x2(__M, __A); +} + +__m256d test_mm256_broadcast_f64x2(__m128d __A) { + // CHECK-LABEL: @test_mm256_broadcast_f64x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 + return _mm256_broadcast_f64x2(__A); +} + +__m256d test_mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { + // CHECK-LABEL: @test_mm256_mask_broadcast_f64x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 + return _mm256_mask_broadcast_f64x2(__O, __M, __A); +} + +__m256d test_mm256_maskz_broadcast_f64x2(__mmask8 __M, __m128d __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcast_f64x2 + // CHECK: @llvm.x86.avx512.mask.broadcastf64x2 + return _mm256_maskz_broadcast_f64x2(__M, __A); +} + +__m128i test_mm_broadcast_i32x2(__m128i __A) { + // CHECK-LABEL: @test_mm_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm_broadcast_i32x2(__A); +} + +__m128i test_mm_mask_broadcast_i32x2(__m128i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_mask_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm_mask_broadcast_i32x2(__O, __M, __A); +} + +__m128i test_mm_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm_maskz_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm_maskz_broadcast_i32x2(__M, __A); +} + +__m256i test_mm256_broadcast_i32x2(__m128i __A) { + // CHECK-LABEL: @test_mm256_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm256_broadcast_i32x2(__A); +} + +__m256i test_mm256_mask_broadcast_i32x2(__m256i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm256_mask_broadcast_i32x2(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcast_i32x2(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcast_i32x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti32x2 + return _mm256_maskz_broadcast_i32x2(__M, __A); +} + +__m256i test_mm256_broadcast_i64x2(__m128i __A) { + // CHECK-LABEL: @test_mm256_broadcast_i64x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 + return _mm256_broadcast_i64x2(__A); +} + +__m256i test_mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_mask_broadcast_i64x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 + return _mm256_mask_broadcast_i64x2(__O, __M, __A); +} + +__m256i test_mm256_maskz_broadcast_i64x2(__mmask8 __M, __m128i __A) { + // CHECK-LABEL: @test_mm256_maskz_broadcast_i64x2 + // CHECK: @llvm.x86.avx512.mask.broadcasti64x2 + return _mm256_maskz_broadcast_i64x2(__M, __A); +} + +__m128d test_mm256_extractf64x2_pd(__m256d __A) { + // CHECK-LABEL: @test_mm256_extractf64x2_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + return _mm256_extractf64x2_pd(__A, 2); +} + +__m128d test_mm256_mask_extractf64x2_pd(__m128d __W, __mmask8 __U, __m256d __A) { + // CHECK-LABEL: @test_mm256_mask_extractf64x2_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + return _mm256_mask_extractf64x2_pd(__W, __U, __A, 2); +} + +__m128d test_mm256_maskz_extractf64x2_pd(__mmask8 __U, __m256d __A) { + // CHECK-LABEL: @test_mm256_maskz_extractf64x2_pd + // CHECK: @llvm.x86.avx512.mask.vextractf64x2 + return _mm256_maskz_extractf64x2_pd(__U, __A, 2); +} + +__m128i test_mm256_extracti64x2_epi64(__m256i __A) { + // CHECK-LABEL: @test_mm256_extracti64x2_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + return _mm256_extracti64x2_epi64(__A, 2); +} + +__m128i test_mm256_mask_extracti64x2_epi64(__m128i __W, __mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_mask_extracti64x2_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + return _mm256_mask_extracti64x2_epi64(__W, __U, __A, 2); +} + +__m128i test_mm256_maskz_extracti64x2_epi64(__mmask8 __U, __m256i __A) { + // CHECK-LABEL: @test_mm256_maskz_extracti64x2_epi64 + // CHECK: @llvm.x86.avx512.mask.vextracti64x2 + return _mm256_maskz_extracti64x2_epi64(__U, __A, 2); +} + +__mmask8 test_mm_mask_fpclass_pd_mask(__mmask8 __U, __m128d __A) { + // CHECK-LABEL: @test_mm_mask_fpclass_pd_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.pd.128 + return _mm_mask_fpclass_pd_mask(__U, __A, 2); +} + +__mmask8 test_mm_fpclass_pd_mask(__m128d __A) { + // CHECK-LABEL: @test_mm_fpclass_pd_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.pd.128 + return _mm_fpclass_pd_mask(__A, 2); +} + +__mmask8 test_mm256_mask_fpclass_pd_mask(__mmask8 __U, __m256d __A) { + // CHECK-LABEL: @test_mm256_mask_fpclass_pd_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.pd.256 + return _mm256_mask_fpclass_pd_mask(__U, __A, 2); +} + +__mmask8 test_mm256_fpclass_pd_mask(__m256d __A) { + // CHECK-LABEL: @test_mm256_fpclass_pd_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.pd.256 + return _mm256_fpclass_pd_mask(__A, 2); +} + +__mmask8 test_mm_mask_fpclass_ps_mask(__mmask8 __U, __m128 __A) { + // CHECK-LABEL: @test_mm_mask_fpclass_ps_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.ps.128 + return _mm_mask_fpclass_ps_mask(__U, __A, 2); +} + +__mmask8 test_mm_fpclass_ps_mask(__m128 __A) { + // CHECK-LABEL: @test_mm_fpclass_ps_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.ps.128 + return _mm_fpclass_ps_mask(__A, 2); +} + +__mmask8 test_mm256_mask_fpclass_ps_mask(__mmask8 __U, __m256 __A) { + // CHECK-LABEL: @test_mm256_mask_fpclass_ps_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.ps.256 + return _mm256_mask_fpclass_ps_mask(__U, __A, 2); +} + +__mmask8 test_mm256_fpclass_ps_mask(__m256 __A) { + // CHECK-LABEL: @test_mm256_fpclass_ps_mask + // CHECK: @llvm.x86.avx512.mask.fpclass.ps.256 + return _mm256_fpclass_ps_mask(__A, 2); +} + +__m256d test_mm256_insertf64x2(__m256d __A, __m128d __B) { + // CHECK-LABEL: @test_mm256_insertf64x2 + // CHECK: @llvm.x86.avx512.mask.insertf64x2 + return _mm256_insertf64x2(__A, __B, 1); +} + +__m256d test_mm256_mask_insertf64x2(__m256d __W, __mmask8 __U, __m256d __A, __m128d __B) { + // CHECK-LABEL: @test_mm256_mask_insertf64x2 + // CHECK: @llvm.x86.avx512.mask.insertf64x2 + return _mm256_mask_insertf64x2(__W, __U, __A, __B, 1); +} + +__m256d test_mm256_maskz_insertf64x2(__mmask8 __U, __m256d __A, __m128d __B) { + // CHECK-LABEL: @test_mm256_maskz_insertf64x2 + // CHECK: @llvm.x86.avx512.mask.insertf64x2 + return _mm256_maskz_insertf64x2(__U, __A, __B, 1); +} + +__m256i test_mm256_inserti64x2(__m256i __A, __m128i __B) { + // CHECK-LABEL: @test_mm256_inserti64x2 + // CHECK: @llvm.x86.avx512.mask.inserti64x2 + return _mm256_inserti64x2(__A, __B, 1); +} + +__m256i test_mm256_mask_inserti64x2(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { + // CHECK-LABEL: @test_mm256_mask_inserti64x2 + // CHECK: @llvm.x86.avx512.mask.inserti64x2 + return _mm256_mask_inserti64x2(__W, __U, __A, __B, 1); +} + +__m256i test_mm256_maskz_inserti64x2(__mmask8 __U, __m256i __A, __m128i __B) { + // CHECK-LABEL: @test_mm256_maskz_inserti64x2 + // CHECK: @llvm.x86.avx512.mask.inserti64x2 + return _mm256_maskz_inserti64x2(__U, __A, __B, 1); +} +