Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1943,6 +1943,13 @@ }]; } +def MinVectorWidth : InheritableAttr { + let Spellings = [Clang<"min_vector_width">]; + let Args = [UnsignedArgument<"VectorWidth">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [Undocumented]; +} + def TransparentUnion : InheritableAttr { let Spellings = [GCC<"transparent_union">]; // let Subjects = SubjectList<[Record, TypedefName]>; Index: include/clang/Basic/Builtins.h =================================================================== --- include/clang/Basic/Builtins.h +++ include/clang/Basic/Builtins.h @@ -206,6 +206,8 @@ return getRecord(ID).Features; } + unsigned getRequiredVectorWidth(unsigned ID) const; + /// Return true if builtin ID belongs to AuxTarget. bool isAuxBuiltinID(unsigned ID) const { return ID >= (Builtin::FirstTSBuiltin + TSRecords.size()); Index: include/clang/Basic/Builtins.def =================================================================== --- include/clang/Basic/Builtins.def +++ include/clang/Basic/Builtins.def @@ -92,6 +92,7 @@ // e -> const, but only when -fno-math-errno // j -> returns_twice (like setjmp) // u -> arguments are not evaluated for their side-effects +// V:N: -> requires vectors of at least N bits to be legal // FIXME: gcc has nonnull #if defined(BUILTIN) && !defined(LIBBUILTIN) Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -36,9 +36,9 @@ // Undefined Values // -TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "") -TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "nc", "") -TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "nc", "") +TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "") // FLAGS // @@ -48,32 +48,32 @@ // 3DNow! // TARGET_BUILTIN(__builtin_ia32_femms, "v", "n", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "nc", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "ncV:64:", "3dnow") // 3DNow! Extensions (3dnowa). -TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "ncV:64:", "3dnowa") // MMX // @@ -85,254 +85,254 @@ // doesn't work in the presence of re-declaration of _mm_prefetch for windows. TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx") TARGET_BUILTIN(__builtin_ia32_emms, "v", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2iIi", "nc", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "nV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "nV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "ncV:64:", "mmx") // MMX2 (MMX+SSE) intrinsics -TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "nc", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "ncV:64:", "mmx,sse") // MMX+SSE2 -TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2") // MMX+SSSE3 -TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "nc", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3") // SSE intrinsics. -TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "nc", "sse") - -TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "nc", "sse2") - -TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "nc", "sse") - -TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "nc", "sse2") - -TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "nc", "ssse3") +TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "ncV:128:", "sse") + +TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "ncV:128:", "sse2") + +TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse") + +TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "ncV:128:", "sse2") + +TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse") TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse") TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse") -TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "n", "sse") -TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "n", "sse") -TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "n", "sse") +TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "nV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "nV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse") TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse") -TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse") - -TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2") -TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "ncV:128:", "sse") + +TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "nV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2") -TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "n", "sse2") TARGET_HEADER_BUILTIN(_mm_clflush, "vvC*", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "n", "sse2") @@ -341,314 +341,316 @@ TARGET_HEADER_BUILTIN(_mm_mfence, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2") TARGET_BUILTIN(__builtin_ia32_pause, "v", "n", "") TARGET_HEADER_BUILTIN(_mm_pause, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "") -TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "n", "sse3") TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3") -TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "n", "sse3") - -TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "nc", "ssse3") - -TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "nc", "sse4.1") - -TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "nV:128:", "sse3") + +TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "ncV:128:", "ssse3") + +TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1") + +TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "ncV:128:", "sse4.1") // SSE 4.2 -TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","nc", "sse4.2") - -TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","nc", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","ncV:128:", "sse4.2") + +TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2") TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2") TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2") TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2") // SSE4a -TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "n", "sse4a") -TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "n", "sse4a") +TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "nV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "nV:128:", "sse4a") // AES -TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "nc", "aes") +TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "ncV:128:", "aes") // VAES -TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") -TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") -TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") -TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") // GFNI -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "nc", "gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "nc", "avx,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "nc", "avx512bw,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "nc", "gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "nc", "avx,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "nc", "avx512bw,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "nc", "gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "nc", "avx,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "nc", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512bw,gfni") // CLMUL -TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "nc", "pclmul") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "ncV:128:", "pclmul") // VPCLMULQDQ -TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "nc", "vpclmulqdq") -TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "nc", "avx512f,vpclmulqdq") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "ncV:256:", "vpclmulqdq") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "ncV:512:", "avx512f,vpclmulqdq") // AVX -TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx") TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_pd256, "V4dV2dC*", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vbroadcastf128_ps256, "V8fV4fC*", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "ncV:256:", "avx") // AVX2 -TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2") // GATHER -TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "n", "avx2") - -TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "n", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "nV:256:", "avx2") + +TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "nV:256:", "avx2") // F16C -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "nc", "f16c") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "nc", "f16c") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "nc", "f16c") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "nc", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "ncV:128:", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "ncV:256:", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "ncV:128:", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "ncV:256:", "f16c") // RDRAND TARGET_BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "n", "rdrnd") @@ -713,97 +715,97 @@ TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "n", "lwp") // SHA -TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "nc", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "ncV:128:", "sha") // FMA -TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "nc", "fma") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "nc", "fma") -TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "nc", "fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "nc", "fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "nc", "fma|fma4") - -TARGET_BUILTIN(__builtin_ia32_vfmaddpd512, "V8dV8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddps512, "V16fV16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512, "V8dV8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512, "V16fV16fV16fV16fIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "ncV:128:", "fma") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "ncV:128:", "fma") +TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "ncV:128:", "fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "ncV:128:", "fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4") + +TARGET_BUILTIN(__builtin_ia32_vfmaddpd512, "V8dV8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddps512, "V16fV16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512, "V8dV8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512, "V16fV16fV16fV16fIi", "ncV:512:", "avx512f") // XOP -TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "nc", "xop") - -TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "nc", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "ncV:128:", "xop") + +TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "ncV:256:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "ncV:256:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "ncV:256:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "ncV:256:", "xop") TARGET_BUILTIN(__builtin_ia32_xbegin, "i", "n", "rtm") TARGET_BUILTIN(__builtin_ia32_xend, "v", "n", "rtm") @@ -822,826 +824,826 @@ TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "n", "pku") // AVX-512 -TARGET_BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er") - -TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er") - -TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fIiUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpps256_mask, "UcV8fV8fIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpps128_mask, "UcV4fV4fIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "nc", "avx512vnni") - -TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLiLLiC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4ffC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4iiC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4ffC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4iiC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2ddC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLiLLiC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4ddC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLiLLiC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4ffC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4iiC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8ffC*V8iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8iiC*V8iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8ddC*V8iUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16ffC*V16fUsIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8ddC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8ffC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiLLiC*V8iUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iiC*V16iUsIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiLLiC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iiC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vd*UcV8iV8dIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vf*UsV16iV16fIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8df, "vd*UcV8LLiV8dIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vf*UcV8LLiV8fIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv8di, "vLLi*UcV8iV8LLiIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vi*UsV16iV16iIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vLLi*UcV8LLiV8LLiIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vi*UcV8LLiV8iIi", "n", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8iLLiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16iiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8LLiLLiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8LLiiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iLLi*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "n", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_sqrtpd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") + +TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") + +TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fIiUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpps256_mask, "UcV8fV8fIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpps128_mask, "UcV4fV4fIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "ncV:128:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") + +TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLiLLiC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4ffC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4iiC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4ffC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4iiC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2ddC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLiLLiC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4ddC*V4iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLiLLiC*V4iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4ffC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4iiC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8ffC*V8iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8iiC*V8iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8ddC*V8iUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16ffC*V16fUsIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8ddC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8ffC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiLLiC*V8iUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iiC*V16iUsIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiLLiC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iiC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vd*UcV8iV8dIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vf*UsV16iV16fIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8df, "vd*UcV8LLiV8dIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vf*UcV8LLiV8fIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv8di, "vLLi*UcV8iV8LLiIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vi*UsV16iV16iIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vLLi*UcV8LLiV8LLiIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vi*UcV8LLiV8iIi", "nV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8iLLiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16iiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8LLiLLiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8LLiiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iLLi*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf") TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "nc", "avx512cd") - -TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "nc", "avx512vpopcntdq") -TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "nc", "avx512vpopcntdq") - -TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "nc", "avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "nc", "avx512bitalg") - -TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "nc", "avx512bitalg") - -TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "nc", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "nc", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "n", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vd*UcV2LLiV2dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vLLi*UcV2LLiV2LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vd*UcV4LLiV4dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vLLi*UcV4LLiV4LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vf*UcV2LLiV4fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vi*UcV2LLiV4iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vf*UcV4LLiV4fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vi*UcV4LLiV4iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vd*UcV4iV2dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vLLi*UcV4iV2LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vd*UcV4iV4dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vLLi*UcV4iV4LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vf*UcV4iV4fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vi*UcV4iV4iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "n", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_prold512_mask, "V16iV16iIiV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prolq512_mask, "V8LLiV8LLiIiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prold128_mask, "V4iV4iIiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prold256_mask, "V8iV8iIiV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolq128_mask, "V2LLiV2LLiIiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolq256_mask, "V4LLiV4LLiIiV4LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prolvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prord512_mask, "V16iV16iiV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prorq512_mask, "V8LLiV8LLiiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prolvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prord128_mask, "V4iV4iIiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prord256_mask, "V8iV8iIiV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorq128_mask, "V2LLiV2LLiIiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorq256_mask, "V4LLiV4LLiIiV4LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prorvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prorvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512cd") + +TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512vpopcntdq") + +TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "ncV:256:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "ncV:256:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "ncV:512:", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_addpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "nV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vd*UcV2LLiV2dIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vLLi*UcV2LLiV2LLiIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vd*UcV4LLiV4dIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vLLi*UcV4LLiV4LLiIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vf*UcV2LLiV4fIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vi*UcV2LLiV4iIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vf*UcV4LLiV4fIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vi*UcV4LLiV4iIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vd*UcV4iV2dIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vLLi*UcV4iV2LLiIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vd*UcV4iV4dIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vLLi*UcV4iV4LLiIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vf*UcV4iV4fIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vi*UcV4iV4iIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "nV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_vpshldd128_mask, "V4iV4iV4iIiV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd256_mask, "V8iV8iV8iIiV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd512_mask, "V16iV16iV16iIiV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw128_mask, "V8sV8sV8sIiV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw256_mask, "V16sV16sV16sIiV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw512_mask, "V32sV32sV32sIiV32sUi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdd128_mask, "V4iV4iV4iIiV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256_mask, "V8iV8iV8iIiV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512_mask, "V16iV16iV16iIiV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128_mask, "V2LLiV2LLiV2LLiIiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256_mask, "V4LLiV4LLiV4LLiIiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512_mask, "V8LLiV8LLiV8LLiIiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128_mask, "V8sV8sV8sIiV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256_mask, "V16sV16sV16sIiV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512_mask, "V32sV32sV32sIiV32sUi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_prold512_mask, "V16iV16iIiV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prolq512_mask, "V8LLiV8LLiIiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prold128_mask, "V4iV4iIiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prold256_mask, "V8iV8iIiV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolq128_mask, "V2LLiV2LLiIiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolq256_mask, "V4LLiV4LLiIiV4LLiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prolvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prord512_mask, "V16iV16iiV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prorq512_mask, "V8LLiV8LLiiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prolvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prord128_mask, "V4iV4iIiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prord256_mask, "V8iV8iIiV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorq128_mask, "V2LLiV2LLiIiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorq256_mask, "V4LLiV4LLiIiV4LLiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prorvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prorvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fiV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8diV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fiV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "nV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "nV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "nV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "nV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2diV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4diV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fiV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fiV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8diV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fiV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f") @@ -1650,72 +1652,72 @@ TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "nc", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw128_mask, "V8sV16cV16cIiV8sUc", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw256_mask, "V16sV32cV32cIiV16sUs", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw512_mask, "V32sV64cV64cIiV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "ncV:256:", "avx512vbmi,avx512vl") // generic select intrinsics -TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "nc", "") -TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "nc", "") +TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "ncV:512:", "") +TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "") +TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "") +TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "ncV:512:", "") +TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "") +TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "") // MONITORX/MWAITX TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "n", "mwaitx") Index: include/clang/Basic/BuiltinsX86_64.def =================================================================== --- include/clang/Basic/BuiltinsX86_64.def +++ include/clang/Basic/BuiltinsX86_64.def @@ -44,10 +44,10 @@ TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "ULLi", "n", "") TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vULLi", "n", "") -TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "n", "sse2") TARGET_BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "nc", "sse4.2") TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase") @@ -82,18 +82,18 @@ TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "nc", "tbm") TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "n", "lwp") TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "n", "lwp") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri") TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vULLi", "n", "ptwrite") Index: lib/Basic/Builtins.cpp =================================================================== --- lib/Basic/Builtins.cpp +++ lib/Basic/Builtins.cpp @@ -107,6 +107,21 @@ Table.get(getRecord(ID).Name).setBuiltinID(0); } +unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const { + const char *Width = ::strchr(getRecord(ID).Attributes, 'V'); + if (!Width) + return 0; + + ++Width; + assert(*Width == ':' && "Vector width specifier must be followed by a ':'"); + ++Width; + + + assert(::strchr(Width, ':') && + "Vector width specifier must be end with a ':'"); + return ::strtol(Width, nullptr, 10); +} + bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, const char *Fmt) const { assert(Fmt && "Not passed a format string"); Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -3483,6 +3483,9 @@ // can move this up to the beginning of the function. checkTargetFeatures(E, FD); + if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) + LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); + // See if we have a target specific intrinsic. const char *Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1463,6 +1463,10 @@ /// Terminate funclets keyed by parent funclet pad. llvm::MapVector TerminateFunclets; + /// Largest vector with used in ths function. Will be used to create a + /// function attribute. + unsigned LargestVectorWidth; + /// True if we need emit the life-time markers. const bool ShouldEmitLifetimeMarkers; Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -81,7 +81,7 @@ CXXStructorImplicitParamDecl(nullptr), CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr), CurLexicalScope(nullptr), TerminateLandingPad(nullptr), - TerminateHandler(nullptr), TrapBB(nullptr), + TerminateHandler(nullptr), TrapBB(nullptr), LargestVectorWidth(0), ShouldEmitLifetimeMarkers( shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { if (!suppressNewContext) @@ -445,6 +445,11 @@ cast(NormalCleanupDest.getPointer()), DT); NormalCleanupDest = Address::invalid(); } + + // Add the required-vector-width attribute + if (LargestVectorWidth != 0) + CurFn->addFnAttr("min-legal-vector-width", + llvm::utostr(LargestVectorWidth)); } /// ShouldInstrumentFunction - Return true if the current function should be @@ -1186,6 +1191,12 @@ // Emit a location at the end of the prologue. if (CGDebugInfo *DI = getDebugInfo()) DI->EmitLocation(Builder, StartLoc); + + // TODO: Do we need to handle this in two places like we do with + // target-features/target-cpu? + if (CurFuncDecl) + if (auto *VecWidth = CurFuncDecl->getAttr()) + LargestVectorWidth = VecWidth->getVectorWidth(); } void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, Index: lib/Headers/__wmmintrin_aes.h =================================================================== --- lib/Headers/__wmmintrin_aes.h +++ lib/Headers/__wmmintrin_aes.h @@ -29,7 +29,7 @@ #define __WMMINTRIN_AES_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128))) /// Performs a single round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source Index: lib/Headers/ammintrin.h =================================================================== --- lib/Headers/ammintrin.h +++ lib/Headers/ammintrin.h @@ -27,7 +27,7 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) /// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index \a idx and of the length \a len. Index: lib/Headers/avx2intrin.h =================================================================== --- lib/Headers/avx2intrin.h +++ lib/Headers/avx2intrin.h @@ -29,98 +29,99 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2"))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ #define _mm256_mpsadbw_epu8(X, Y, M) \ (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); @@ -130,19 +131,19 @@ (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (n)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b) { typedef unsigned short __v32hu __attribute__((__vector_size__(64))); @@ -152,7 +153,7 @@ >> 1, __v32qu); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b) { typedef unsigned int __v16su __attribute__((__vector_size__(64))); @@ -162,7 +163,7 @@ >> 1, __v16hu); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, @@ -189,31 +190,31 @@ (((M) & 0x40) ? 30 : 14), \ (((M) & 0x80) ? 31 : 15)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char @@ -221,151 +222,151 @@ return (__m256i)((__v32qs)__a > (__v32qs)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -373,7 +374,7 @@ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -381,7 +382,7 @@ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -389,115 +390,115 @@ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32 (__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); @@ -543,19 +544,19 @@ 8 + (((imm) >> 6) & 0x3), \ 12, 13, 14, 15) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); @@ -600,61 +601,61 @@ #define _mm256_bslli_epi128(a, count) _mm256_slli_si256((a), (count)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); @@ -699,176 +700,176 @@ #define _mm256_bsrli_epi128(a, count) _mm256_srli_si256((a), (count)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_stream_load_si256(__m256i const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); @@ -894,56 +895,56 @@ (((M) & 0x40) ? 14 : 6), \ (((M) & 0x80) ? 15 : 7)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); @@ -957,7 +958,7 @@ ((M) >> 4) & 0x3, \ ((M) >> 6) & 0x3) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); @@ -988,109 +989,109 @@ (((M) & 1) ? 4 : 2), \ (((M) & 1) ? 5 : 3) ) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi32(int const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi64(long long const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi32(int const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi64(long long const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); @@ -1303,6 +1304,7 @@ (__v4di)(__m256i)(i), \ (__v4di)_mm256_set1_epi64x(-1), (s)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVX2INTRIN_H */ Index: lib/Headers/avx512bitalgintrin.h =================================================================== --- lib/Headers/avx512bitalgintrin.h +++ lib/Headers/avx512bitalgintrin.h @@ -29,7 +29,7 @@ #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) Index: lib/Headers/avx512bwintrin.h =================================================================== --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -32,7 +32,7 @@ typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) /* Integer compare */ Index: lib/Headers/avx512cdintrin.h =================================================================== --- lib/Headers/avx512cdintrin.h +++ lib/Headers/avx512cdintrin.h @@ -29,7 +29,7 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi64 (__m512i __A) Index: lib/Headers/avx512dqintrin.h =================================================================== --- lib/Headers/avx512dqintrin.h +++ lib/Headers/avx512dqintrin.h @@ -29,7 +29,7 @@ #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi64 (__m512i __A, __m512i __B) { Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -173,7 +173,8 @@ } _MM_MANTISSA_SIGN_ENUM; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) /* Create vectors with repeated elements */ @@ -1044,7 +1045,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1053,7 +1054,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1080,7 +1081,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1089,7 +1090,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1305,7 +1306,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1314,7 +1315,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1341,7 +1342,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1350,7 +1351,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1680,7 +1681,7 @@ (__mmask16) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1690,7 +1691,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1699,7 +1700,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1708,7 +1709,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, @@ -1718,7 +1719,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, @@ -1727,7 +1728,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, @@ -1788,7 +1789,7 @@ (__mmask16) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1798,7 +1799,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1807,7 +1808,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1816,7 +1817,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, @@ -1826,7 +1827,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, @@ -1835,7 +1836,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, @@ -1960,7 +1961,7 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1969,7 +1970,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_addss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1996,7 +1997,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2005,7 +2006,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_addsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2095,7 +2096,7 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2104,7 +2105,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_subss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2130,7 +2131,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2139,7 +2140,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_subsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2230,7 +2231,7 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2239,7 +2240,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_mulss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2265,7 +2266,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2274,7 +2275,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_mulsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2365,7 +2366,7 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2374,7 +2375,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2401,7 +2402,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2410,7 +2411,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -5747,14 +5748,14 @@ (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, @@ -5770,7 +5771,7 @@ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, @@ -5792,14 +5793,14 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, @@ -5815,7 +5816,7 @@ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, @@ -6254,7 +6255,7 @@ #define _mm_cvt_roundsd_u32(A, R) \ (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, @@ -6266,7 +6267,7 @@ (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) @@ -6292,7 +6293,7 @@ #define _mm_cvt_roundss_u32(A, R) \ (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, @@ -6304,7 +6305,7 @@ (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) @@ -6319,7 +6320,7 @@ #define _mm_cvtt_roundsd_si32(A, R) \ (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32 (__m128d __A) { return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, @@ -6333,7 +6334,7 @@ #define _mm_cvtt_roundsd_i64(A, R) \ (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_i64 (__m128d __A) { return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, @@ -6344,7 +6345,7 @@ #define _mm_cvtt_roundsd_u32(A, R) \ (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, @@ -6356,7 +6357,7 @@ (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) @@ -6371,7 +6372,7 @@ #define _mm_cvtt_roundss_si32(A, R) \ (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32 (__m128 __A) { return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, @@ -6385,7 +6386,7 @@ #define _mm_cvtt_roundss_si64(A, R) \ (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttss_i64 (__m128 __A) { return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, @@ -6396,7 +6397,7 @@ #define _mm_cvtt_roundss_u32(A, R) \ (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, @@ -6408,7 +6409,7 @@ (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) @@ -6812,7 +6813,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, @@ -6821,7 +6822,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, @@ -6837,7 +6838,7 @@ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, @@ -6859,7 +6860,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, @@ -6868,7 +6869,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, @@ -6884,7 +6885,7 @@ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, @@ -7098,7 +7099,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, @@ -7114,7 +7115,7 @@ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, @@ -7136,7 +7137,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, @@ -7152,7 +7153,7 @@ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, @@ -8203,7 +8204,7 @@ (__v8si)(__m256i)(index), \ (__v8di)(__m512i)(v1), (int)(scale)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8219,7 +8220,7 @@ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -8235,7 +8236,7 @@ (__v4sf)(__m128)(C), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8251,7 +8252,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8267,7 +8268,7 @@ -(__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -8283,7 +8284,7 @@ -(__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8299,7 +8300,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8315,7 +8316,7 @@ (__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -8331,7 +8332,7 @@ (__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8347,7 +8348,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8363,7 +8364,7 @@ -(__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -8379,7 +8380,7 @@ -(__v4sf)(__m128)(C), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8395,7 +8396,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8411,7 +8412,7 @@ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8427,7 +8428,7 @@ (__v2df)(__m128d)(C), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8443,7 +8444,7 @@ (__v2df)(__m128d)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8459,7 +8460,7 @@ -(__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8475,7 +8476,7 @@ -(__v2df)(__m128d)(C), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8491,7 +8492,7 @@ (__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8507,7 +8508,7 @@ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8523,7 +8524,7 @@ (__v2df)(__m128d)(C), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8539,7 +8540,7 @@ (__v2df)(__m128d)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8555,7 +8556,7 @@ -(__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8572,7 +8573,7 @@ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -9018,7 +9019,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 res = __A; @@ -9026,7 +9027,7 @@ return res; } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { __m128 res = __A; @@ -9034,7 +9035,7 @@ return res; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d res = __A; @@ -9042,7 +9043,7 @@ return res; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { __m128d res = __A; @@ -9050,19 +9051,19 @@ return res; } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) { __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) { __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) { __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, @@ -9072,7 +9073,7 @@ return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss (__mmask8 __U, const float* __A) { return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A, @@ -9080,7 +9081,7 @@ __U & 1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) { __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, @@ -9090,7 +9091,7 @@ return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd (__mmask8 __U, const double* __A) { return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, @@ -9383,7 +9384,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, @@ -9392,7 +9393,7 @@ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, @@ -9456,7 +9457,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, @@ -9465,7 +9466,7 @@ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) { return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, @@ -9474,7 +9475,7 @@ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd (__m128d __A, unsigned __B) { __A[0] = __B; @@ -9486,7 +9487,7 @@ (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ (unsigned long long)(B), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) { __A[0] = __B; @@ -9498,7 +9499,7 @@ (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss (__m128 __A, unsigned __B) { __A[0] = __B; @@ -9510,7 +9511,7 @@ (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ (unsigned long long)(B), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) { __A[0] = __B; @@ -10194,5 +10195,6 @@ #undef _mm512_mask_reduce_maxMin_32bit #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVX512FINTRIN_H */ Index: lib/Headers/avx512ifmaintrin.h =================================================================== --- lib/Headers/avx512ifmaintrin.h +++ lib/Headers/avx512ifmaintrin.h @@ -29,7 +29,7 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) Index: lib/Headers/avx512ifmavlintrin.h =================================================================== --- lib/Headers/avx512ifmavlintrin.h +++ lib/Headers/avx512ifmavlintrin.h @@ -29,18 +29,19 @@ #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y, (__v2di) __Z); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -48,7 +49,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -56,14 +57,14 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -71,7 +72,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -79,14 +80,14 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, (__v2di)__Z); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -94,7 +95,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -102,14 +103,14 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -117,7 +118,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -126,6 +127,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vbmi2intrin.h =================================================================== --- lib/Headers/avx512vbmi2intrin.h +++ lib/Headers/avx512vbmi2intrin.h @@ -29,7 +29,7 @@ #define __AVX512VBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vbmiintrin.h =================================================================== --- lib/Headers/avx512vbmiintrin.h +++ lib/Headers/avx512vbmiintrin.h @@ -29,7 +29,7 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vbmivlintrin.h =================================================================== --- lib/Headers/avx512vbmivlintrin.h +++ lib/Headers/avx512vbmivlintrin.h @@ -29,10 +29,11 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, @@ -40,7 +41,7 @@ (__v16qi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, __m128i __B) { @@ -49,7 +50,7 @@ (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, __m128i __B) { @@ -58,7 +59,7 @@ (__v16qi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, __m128i __B) { @@ -67,14 +68,14 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, (__v32qi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, __m256i __B) { @@ -83,7 +84,7 @@ (__v32qi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, __m256i __B) { @@ -92,7 +93,7 @@ (__v32qi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { @@ -101,13 +102,13 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -115,7 +116,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { @@ -124,13 +125,13 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, __m256i __B) { @@ -139,7 +140,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { @@ -148,7 +149,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, @@ -157,7 +158,7 @@ (__mmask16) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, @@ -167,7 +168,7 @@ (__mmask16) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, @@ -177,7 +178,7 @@ (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, @@ -186,7 +187,7 @@ (__mmask32) __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, @@ -196,7 +197,7 @@ (__mmask32) __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, @@ -207,6 +208,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlbitalgintrin.h =================================================================== --- lib/Headers/avx512vlbitalgintrin.h +++ lib/Headers/avx512vlbitalgintrin.h @@ -29,15 +29,16 @@ #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, @@ -45,7 +46,7 @@ (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), @@ -53,13 +54,13 @@ __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, @@ -67,7 +68,7 @@ (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), @@ -75,13 +76,13 @@ __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi8(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, @@ -89,7 +90,7 @@ (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), @@ -97,13 +98,13 @@ __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, @@ -111,7 +112,7 @@ (__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), @@ -119,7 +120,7 @@ __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, @@ -127,7 +128,7 @@ __U); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, @@ -135,7 +136,7 @@ __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, @@ -143,7 +144,7 @@ __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, @@ -152,6 +153,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlbwintrin.h =================================================================== --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -29,7 +29,8 @@ #define __AVX512VLBWINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256))) /* Integer compare */ @@ -313,147 +314,147 @@ #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -461,7 +462,7 @@ (__v16qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -469,7 +470,7 @@ (__v32qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -477,7 +478,7 @@ (__v8hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -485,7 +486,7 @@ (__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -493,7 +494,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -501,7 +502,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -509,7 +510,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -517,7 +518,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -525,7 +526,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -533,7 +534,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -541,7 +542,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -549,14 +550,14 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -564,7 +565,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -572,7 +573,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -580,7 +581,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -588,7 +589,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -596,7 +597,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -604,7 +605,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -612,7 +613,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -620,7 +621,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -628,7 +629,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -636,7 +637,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -644,7 +645,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -652,7 +653,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -660,7 +661,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -668,7 +669,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -676,7 +677,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -684,7 +685,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -692,7 +693,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -700,7 +701,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -708,7 +709,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -716,7 +717,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -724,7 +725,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -732,7 +733,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -740,7 +741,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -748,7 +749,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -756,7 +757,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -764,7 +765,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -772,7 +773,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -780,7 +781,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -788,7 +789,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -796,7 +797,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -804,7 +805,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -812,7 +813,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -820,7 +821,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -828,7 +829,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -836,7 +837,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -844,7 +845,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -852,7 +853,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -860,7 +861,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -868,7 +869,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -876,7 +877,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -884,7 +885,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -892,7 +893,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -900,7 +901,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -908,7 +909,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -916,7 +917,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -924,7 +925,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -932,7 +933,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -940,7 +941,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -948,7 +949,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -956,7 +957,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -964,7 +965,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -972,7 +973,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -980,7 +981,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -988,7 +989,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -996,7 +997,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1004,7 +1005,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1012,7 +1013,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1020,7 +1021,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1028,7 +1029,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1036,7 +1037,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1044,7 +1045,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1052,7 +1053,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1060,7 +1061,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1068,7 +1069,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1076,7 +1077,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1084,7 +1085,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1092,7 +1093,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1100,7 +1101,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1108,7 +1109,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1116,7 +1117,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1124,7 +1125,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1132,7 +1133,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1140,7 +1141,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1148,7 +1149,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1156,7 +1157,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1164,7 +1165,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1172,7 +1173,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1180,7 +1181,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1188,7 +1189,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1196,7 +1197,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1204,7 +1205,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1212,7 +1213,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1220,7 +1221,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1228,7 +1229,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1236,7 +1237,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1244,7 +1245,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1252,7 +1253,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1260,7 +1261,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1268,7 +1269,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1276,7 +1277,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1284,14 +1285,14 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { @@ -1300,7 +1301,7 @@ (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { @@ -1309,7 +1310,7 @@ (__v8hi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { @@ -1318,14 +1319,14 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B) { @@ -1334,7 +1335,7 @@ (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B) { @@ -1343,7 +1344,7 @@ (__v16hi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, __m256i __B) { @@ -1352,21 +1353,21 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1374,126 +1375,126 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_maddubs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, @@ -1501,273 +1502,273 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8 (__m256i __A) { return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)_mm_setzero_si128()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1775,7 +1776,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1783,7 +1784,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1791,7 +1792,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1800,7 +1801,7 @@ } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1808,7 +1809,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1816,7 +1817,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1824,7 +1825,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1875,13 +1876,13 @@ (imm)), \ (__v16hi)_mm256_setzero_si256()) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1889,7 +1890,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1897,13 +1898,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1911,7 +1912,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1919,7 +1920,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1927,7 +1928,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1935,7 +1936,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1943,7 +1944,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1951,7 +1952,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1959,7 +1960,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1967,7 +1968,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1975,7 +1976,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1983,13 +1984,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1997,7 +1998,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2005,13 +2006,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2019,7 +2020,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2027,13 +2028,13 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2041,7 +2042,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2049,13 +2050,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2063,7 +2064,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2071,7 +2072,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2079,7 +2080,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2087,7 +2088,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2095,7 +2096,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2103,7 +2104,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2111,7 +2112,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2119,7 +2120,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2127,7 +2128,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2135,7 +2136,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2143,7 +2144,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2151,7 +2152,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2159,7 +2160,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2167,7 +2168,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2175,7 +2176,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2183,7 +2184,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2191,7 +2192,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2199,7 +2200,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2207,7 +2208,7 @@ (__v8hi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2215,7 +2216,7 @@ (__v8hi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2223,7 +2224,7 @@ (__v16hi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2231,7 +2232,7 @@ (__v16hi) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2239,7 +2240,7 @@ (__v16qi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2247,7 +2248,7 @@ (__v16qi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2255,7 +2256,7 @@ (__v32qi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2264,7 +2265,7 @@ } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2272,7 +2273,7 @@ (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8 (__mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2280,7 +2281,7 @@ (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2288,7 +2289,7 @@ (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2296,7 +2297,7 @@ (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, @@ -2304,7 +2305,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, @@ -2313,7 +2314,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, @@ -2321,7 +2322,7 @@ (__mmask16) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, @@ -2330,7 +2331,7 @@ (__mmask16) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, @@ -2338,7 +2339,7 @@ (__mmask16) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, @@ -2347,7 +2348,7 @@ (__mmask16) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, @@ -2355,7 +2356,7 @@ (__mmask32) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, @@ -2363,7 +2364,7 @@ _mm256_setzero_si256 (), (__mmask32) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, @@ -2371,7 +2372,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) { __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, @@ -2379,7 +2380,7 @@ (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) { __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, @@ -2387,7 +2388,7 @@ (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) { __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, @@ -2395,162 +2396,162 @@ (__mmask32) __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256 ()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask (__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask (__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask (__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8 (__mmask16 __A) { return (__m128i) __builtin_ia32_cvtmask2b128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8 (__mmask32 __A) { return (__m256i) __builtin_ia32_cvtmask2b256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2w128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16 (__mmask16 __A) { return (__m256i) __builtin_ia32_cvtmask2w256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2558,7 +2559,7 @@ (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2566,7 +2567,7 @@ (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2574,7 +2575,7 @@ (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2582,7 +2583,7 @@ (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2590,7 +2591,7 @@ (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2598,7 +2599,7 @@ (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2606,7 +2607,7 @@ (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2614,7 +2615,7 @@ (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256 (__M, @@ -2622,7 +2623,7 @@ (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256(__M, @@ -2630,7 +2631,7 @@ (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2638,7 +2639,7 @@ (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16 (__mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2646,13 +2647,13 @@ (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -2660,7 +2661,7 @@ (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { @@ -2669,13 +2670,13 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, __m256i __B) { @@ -2684,7 +2685,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { @@ -2749,6 +2750,7 @@ (__v16hi)_mm256_setzero_si256(), \ (__mmask16)(U)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLBWINTRIN_H */ Index: lib/Headers/avx512vlcdintrin.h =================================================================== --- lib/Headers/avx512vlcdintrin.h +++ lib/Headers/avx512vlcdintrin.h @@ -28,35 +28,36 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmb_epi64 (__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmb_epi64 (__mmask8 __A) { return (__m256i) _mm256_set1_epi64x((long long)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmw_epi32 (__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmw_epi32 (__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -64,7 +65,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -72,7 +73,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -81,7 +82,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -89,7 +90,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -97,7 +98,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -105,7 +106,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -113,7 +114,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -121,7 +122,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -129,7 +130,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -137,7 +138,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -145,7 +146,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -154,13 +155,13 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -168,7 +169,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -176,13 +177,13 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -190,7 +191,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -198,13 +199,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -212,7 +213,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -220,13 +221,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -234,7 +235,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -242,6 +243,7 @@ (__v4di)_mm256_setzero_si256()); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLCDINTRIN_H */ Index: lib/Headers/avx512vldqintrin.h =================================================================== --- lib/Headers/avx512vldqintrin.h +++ lib/Headers/avx512vldqintrin.h @@ -29,760 +29,761 @@ #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64 (__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps (__m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps (__m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), @@ -919,62 +920,62 @@ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U)) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2d128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2d256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2q128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2 (__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -982,7 +983,7 @@ (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -990,14 +991,14 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A) { return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, @@ -1005,7 +1006,7 @@ (__v4df)__O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, @@ -1013,14 +1014,14 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2 (__m128i __A) { return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -1028,7 +1029,7 @@ (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -1036,14 +1037,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2 (__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -1051,7 +1052,7 @@ (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -1059,14 +1060,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -1074,7 +1075,7 @@ (__v4di)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -1182,6 +1183,7 @@ (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__mmask8)-1) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -28,7 +28,8 @@ #ifndef __AVX512VLINTRIN_H #define __AVX512VLINTRIN_H -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) /* Integer compare */ @@ -232,7 +233,7 @@ #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -240,7 +241,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -248,7 +249,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -256,7 +257,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -264,7 +265,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -272,7 +273,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -280,7 +281,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -288,7 +289,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -296,7 +297,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -304,7 +305,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -312,7 +313,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -320,7 +321,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -328,7 +329,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -336,7 +337,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -344,7 +345,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -352,7 +353,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -360,7 +361,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -368,7 +369,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -376,7 +377,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -384,7 +385,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -392,7 +393,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -400,7 +401,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -408,7 +409,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -416,7 +417,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -424,7 +425,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -432,7 +433,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -440,7 +441,7 @@ (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -448,7 +449,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -456,7 +457,7 @@ (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -464,13 +465,13 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -478,13 +479,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -492,14 +493,14 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -507,13 +508,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -521,13 +522,13 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -535,13 +536,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -549,13 +550,13 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -564,13 +565,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -578,13 +579,13 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -592,13 +593,13 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -606,14 +607,14 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -621,13 +622,13 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -635,13 +636,13 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -649,13 +650,13 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -663,13 +664,13 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -678,7 +679,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); @@ -804,7 +805,7 @@ (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)(m)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -814,7 +815,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -824,7 +825,7 @@ (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -834,7 +835,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -844,7 +845,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -854,7 +855,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -864,7 +865,7 @@ (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -874,7 +875,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -884,7 +885,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -894,7 +895,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -904,7 +905,7 @@ (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -914,7 +915,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -924,7 +925,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -934,7 +935,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -944,7 +945,7 @@ (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -954,7 +955,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -964,7 +965,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -974,7 +975,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -984,7 +985,7 @@ (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -994,7 +995,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1004,7 +1005,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1014,7 +1015,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1024,7 +1025,7 @@ (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1034,7 +1035,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1044,7 +1045,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1054,7 +1055,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1064,7 +1065,7 @@ (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1074,7 +1075,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1084,7 +1085,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1094,7 +1095,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1104,7 +1105,7 @@ (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1114,7 +1115,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1124,7 +1125,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1134,7 +1135,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1144,7 +1145,7 @@ (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1154,7 +1155,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1164,7 +1165,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1174,7 +1175,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1184,7 +1185,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1194,7 +1195,7 @@ (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1204,7 +1205,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1214,7 +1215,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1224,7 +1225,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1234,7 +1235,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1244,7 +1245,7 @@ (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1254,7 +1255,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1264,7 +1265,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1274,7 +1275,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { @@ -1285,7 +1286,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1295,7 +1296,7 @@ (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1305,7 +1306,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1315,7 +1316,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1325,7 +1326,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1335,7 +1336,7 @@ (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1345,7 +1346,7 @@ (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1355,7 +1356,7 @@ (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1365,7 +1366,7 @@ (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1375,7 +1376,7 @@ (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1385,7 +1386,7 @@ (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1395,7 +1396,7 @@ (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1405,7 +1406,7 @@ (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1415,7 +1416,7 @@ (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1425,7 +1426,7 @@ (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1435,7 +1436,7 @@ (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1445,7 +1446,7 @@ (__v8sf) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1455,7 +1456,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1465,7 +1466,7 @@ (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1475,7 +1476,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1485,7 +1486,7 @@ (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1495,7 +1496,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1505,7 +1506,7 @@ (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1515,7 +1516,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1525,126 +1526,126 @@ (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) @@ -1652,14 +1653,14 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) @@ -1667,14 +1668,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) @@ -1682,14 +1683,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) @@ -1697,14 +1698,14 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) @@ -1712,14 +1713,14 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) @@ -1727,14 +1728,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) @@ -1742,14 +1743,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) @@ -1757,126 +1758,126 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, (__v2df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, (__v4df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, (__v2di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, (__v4di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, (__v4sf) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, (__v8sf) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, (__v4si) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, (__v8si) __A, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps (__mmask16 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps (__mmask16 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) @@ -1884,28 +1885,28 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) @@ -1913,21 +1914,21 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) @@ -1935,14 +1936,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) @@ -1950,7 +1951,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) @@ -1958,14 +1959,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) @@ -1973,63 +1974,63 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2037,14 +2038,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2052,7 +2053,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2060,14 +2061,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2075,14 +2076,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) @@ -2090,21 +2091,21 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) @@ -2112,14 +2113,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) @@ -2127,7 +2128,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) @@ -2135,14 +2136,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) @@ -2150,35 +2151,35 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2186,14 +2187,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2201,7 +2202,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2209,14 +2210,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2224,147 +2225,147 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd (__m128i __A) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd (__m128i __A) { return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps (__m128i __A) { return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps (__m256i __A) { return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) @@ -2372,14 +2373,14 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) @@ -2387,14 +2388,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) @@ -2402,14 +2403,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) @@ -2417,7 +2418,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, (__v2df) __W, @@ -2425,7 +2426,7 @@ __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, (__v2df) @@ -2434,7 +2435,7 @@ __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, (__v4df) __W, @@ -2442,7 +2443,7 @@ __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, (__v4df) @@ -2451,7 +2452,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, (__v2di) __W, @@ -2459,7 +2460,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, (__v2di) @@ -2468,7 +2469,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, @@ -2477,7 +2478,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, (__v4di) @@ -2486,14 +2487,14 @@ __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, (__v4sf) @@ -2502,14 +2503,14 @@ __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, (__v8sf) @@ -2518,7 +2519,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, (__v4si) __W, @@ -2526,7 +2527,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, (__v4si) @@ -2534,7 +2535,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, @@ -2543,7 +2544,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, (__v8si) @@ -2552,14 +2553,14 @@ __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) @@ -2567,14 +2568,14 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) @@ -2582,14 +2583,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) @@ -2597,14 +2598,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) @@ -2612,7 +2613,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd (__m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) @@ -2620,14 +2621,14 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) @@ -2635,7 +2636,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd (__m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) @@ -2643,14 +2644,14 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) @@ -2658,7 +2659,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps (__m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) @@ -2666,14 +2667,14 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) @@ -2681,7 +2682,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps (__m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) @@ -2689,14 +2690,14 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) @@ -2704,498 +2705,498 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask16)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), @@ -3276,7 +3277,7 @@ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, @@ -3285,7 +3286,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, @@ -3294,7 +3295,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, @@ -3303,7 +3304,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, @@ -3312,7 +3313,7 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, @@ -3321,7 +3322,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, @@ -3330,7 +3331,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -3339,7 +3340,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -3347,7 +3348,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -3356,7 +3357,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -3365,7 +3366,7 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, @@ -3374,7 +3375,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -3543,125 +3544,125 @@ (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_pd()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)__W); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)__W); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)__W); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)__W); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, (__v4si)__B); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3669,7 +3670,7 @@ (__v4si)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3677,7 +3678,7 @@ (__v4si)__I); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3685,13 +3686,13 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, (__v8si) __B); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3699,7 +3700,7 @@ (__v8si)__A); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3707,7 +3708,7 @@ (__v8si)__I); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3715,40 +3716,40 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, (__v2df)__B); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)__A); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)(__m128d)__I); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, (__v4df)__B); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3756,7 +3757,7 @@ (__v4df)__A); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3764,7 +3765,7 @@ (__v4df)(__m256d)__I); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3772,47 +3773,47 @@ (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, (__v4sf)__B); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)__A); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)(__m128)__I); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, (__v8sf) __B); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), (__v8sf)__A); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, @@ -3820,7 +3821,7 @@ (__v8sf)(__m256)__I); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, @@ -3828,13 +3829,13 @@ (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, (__v2di)__B); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3842,7 +3843,7 @@ (__v2di)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3850,7 +3851,7 @@ (__v2di)__I); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3859,13 +3860,13 @@ } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, (__v4di) __B); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3873,7 +3874,7 @@ (__v4di)__A); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3881,7 +3882,7 @@ (__v4di)__I); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3889,7 +3890,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -3897,7 +3898,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -3905,7 +3906,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -3913,7 +3914,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -3921,7 +3922,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3929,7 +3930,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3937,7 +3938,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3945,7 +3946,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3953,7 +3954,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3961,7 +3962,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3969,7 +3970,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3977,7 +3978,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3985,7 +3986,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -3993,7 +3994,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4001,7 +4002,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4009,7 +4010,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4017,7 +4018,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4025,7 +4026,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4033,7 +4034,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4041,7 +4042,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4050,7 +4051,7 @@ } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4058,7 +4059,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4066,7 +4067,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4074,7 +4075,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4082,7 +4083,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4090,7 +4091,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4098,7 +4099,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4106,7 +4107,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4114,7 +4115,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4122,7 +4123,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4130,7 +4131,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4138,7 +4139,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4146,7 +4147,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4154,7 +4155,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4162,7 +4163,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4170,7 +4171,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4178,7 +4179,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4186,7 +4187,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4194,7 +4195,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4202,7 +4203,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4267,7 +4268,7 @@ (__v4di)_mm256_setzero_si256(), \ (__mmask8)(u)) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32 (__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, @@ -4277,7 +4278,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -4287,7 +4288,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prolvd128_mask ((__v4si) __A, @@ -4297,7 +4298,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32 (__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, @@ -4307,7 +4308,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -4317,7 +4318,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prolvd256_mask ((__v8si) __A, @@ -4327,7 +4328,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64 (__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, @@ -4337,7 +4338,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -4347,7 +4348,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prolvq128_mask ((__v2di) __A, @@ -4357,7 +4358,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64 (__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, @@ -4367,7 +4368,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -4377,7 +4378,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prolvq256_mask ((__v4di) __A, @@ -4443,7 +4444,7 @@ (__v4di)_mm256_setzero_si256(), \ (__mmask8)(U)) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4451,7 +4452,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4459,7 +4460,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4467,7 +4468,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4475,7 +4476,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4483,7 +4484,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4491,7 +4492,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4499,7 +4500,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4507,7 +4508,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4515,7 +4516,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4523,7 +4524,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4531,7 +4532,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4539,7 +4540,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4547,7 +4548,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4555,7 +4556,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4563,7 +4564,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4571,7 +4572,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32 (__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, @@ -4581,7 +4582,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -4591,7 +4592,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prorvd128_mask ((__v4si) __A, @@ -4601,7 +4602,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32 (__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, @@ -4611,7 +4612,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -4621,7 +4622,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prorvd256_mask ((__v8si) __A, @@ -4631,7 +4632,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64 (__m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, @@ -4641,7 +4642,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -4651,7 +4652,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_prorvq128_mask ((__v2di) __A, @@ -4661,7 +4662,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64 (__m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, @@ -4671,7 +4672,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { @@ -4681,7 +4682,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_prorvq256_mask ((__v4di) __A, @@ -4691,7 +4692,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4699,7 +4700,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4707,7 +4708,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4715,7 +4716,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4723,7 +4724,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4731,7 +4732,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4739,7 +4740,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4747,7 +4748,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4755,7 +4756,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4763,7 +4764,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4771,7 +4772,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4779,7 +4780,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4787,7 +4788,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4795,7 +4796,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4803,7 +4804,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4811,7 +4812,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4819,7 +4820,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4827,7 +4828,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4835,7 +4836,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4843,7 +4844,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4851,7 +4852,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4859,7 +4860,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4867,7 +4868,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4875,7 +4876,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4883,7 +4884,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4891,7 +4892,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4899,7 +4900,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4907,7 +4908,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4915,7 +4916,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4923,7 +4924,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4931,7 +4932,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4939,7 +4940,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4947,7 +4948,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4955,7 +4956,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4963,7 +4964,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4971,7 +4972,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4979,13 +4980,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4993,7 +4994,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -5001,13 +5002,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5015,7 +5016,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -5023,7 +5024,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, @@ -5031,7 +5032,7 @@ (__v4si) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, @@ -5040,7 +5041,7 @@ } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, @@ -5048,7 +5049,7 @@ (__v8si) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, @@ -5056,7 +5057,7 @@ (__v8si) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, @@ -5065,7 +5066,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, @@ -5075,7 +5076,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, @@ -5084,7 +5085,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, @@ -5094,7 +5095,7 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, @@ -5102,7 +5103,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, @@ -5110,7 +5111,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, @@ -5118,7 +5119,7 @@ (__v2di) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, @@ -5126,7 +5127,7 @@ (__v2di) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, @@ -5134,7 +5135,7 @@ (__v4di) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, @@ -5142,7 +5143,7 @@ (__v4di) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, @@ -5151,7 +5152,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, @@ -5161,7 +5162,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, @@ -5170,7 +5171,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, @@ -5180,7 +5181,7 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, @@ -5188,7 +5189,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, @@ -5196,7 +5197,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5204,7 +5205,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5212,7 +5213,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5220,7 +5221,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5228,7 +5229,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -5236,7 +5237,7 @@ (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -5244,7 +5245,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -5252,7 +5253,7 @@ (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -5261,7 +5262,7 @@ } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, @@ -5269,7 +5270,7 @@ (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, @@ -5277,7 +5278,7 @@ (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, @@ -5285,7 +5286,7 @@ (__v4di) __O) ; } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, @@ -5365,7 +5366,7 @@ (__v8si)(__m256i)(C), (int)(imm), \ (__mmask8)(U)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, @@ -5373,7 +5374,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, @@ -5382,7 +5383,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, @@ -5390,7 +5391,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, @@ -5399,7 +5400,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, @@ -5407,7 +5408,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, @@ -5416,7 +5417,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, @@ -5424,7 +5425,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, @@ -5433,7 +5434,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, @@ -5441,7 +5442,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, @@ -5450,7 +5451,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, @@ -5458,7 +5459,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, @@ -5467,7 +5468,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, @@ -5475,7 +5476,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, @@ -5484,7 +5485,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, @@ -5492,7 +5493,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, @@ -5501,7 +5502,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, @@ -5509,7 +5510,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, @@ -5518,7 +5519,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, @@ -5526,7 +5527,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, @@ -5535,7 +5536,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, @@ -5543,7 +5544,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, @@ -5552,7 +5553,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, @@ -5560,7 +5561,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, @@ -5569,7 +5570,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeapd128_mask ((__v2df *) __P, @@ -5577,7 +5578,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeapd256_mask ((__v4df *) __P, @@ -5585,7 +5586,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeaps128_mask ((__v4sf *) __P, @@ -5593,7 +5594,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeaps256_mask ((__v8sf *) __P, @@ -5601,7 +5602,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqudi128_mask ((__v2di *) __P, @@ -5609,7 +5610,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqudi256_mask ((__v4di *) __P, @@ -5617,7 +5618,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqusi128_mask ((__v4si *) __P, @@ -5625,7 +5626,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqusi256_mask ((__v8si *) __P, @@ -5633,7 +5634,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeupd128_mask ((__v2df *) __P, @@ -5641,7 +5642,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeupd256_mask ((__v4df *) __P, @@ -5649,7 +5650,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeups128_mask ((__v4sf *) __P, @@ -5657,7 +5658,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeups256_mask ((__v8sf *) __P, @@ -5666,7 +5667,7 @@ } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5674,7 +5675,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5682,7 +5683,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5690,7 +5691,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5698,7 +5699,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5706,7 +5707,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5714,7 +5715,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5722,7 +5723,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5730,7 +5731,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5738,7 +5739,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5746,7 +5747,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5754,7 +5755,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5762,7 +5763,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5770,7 +5771,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5778,7 +5779,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5786,7 +5787,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5794,7 +5795,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5803,7 +5804,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5811,7 +5812,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5820,7 +5821,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5829,7 +5830,7 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5837,7 +5838,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5846,7 +5847,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps (__m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5855,7 +5856,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5863,7 +5864,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5872,7 +5873,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps (__m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5881,7 +5882,7 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5889,7 +5890,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5938,7 +5939,7 @@ (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)_mm256_setzero_ps()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5946,7 +5947,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5954,7 +5955,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5962,7 +5963,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5970,7 +5971,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5978,7 +5979,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5986,7 +5987,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5994,7 +5995,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -6002,115 +6003,115 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6118,7 +6119,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6126,7 +6127,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6134,7 +6135,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6142,7 +6143,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6150,7 +6151,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6158,7 +6159,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6166,7 +6167,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6174,7 +6175,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6182,7 +6183,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6190,7 +6191,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6198,7 +6199,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6206,7 +6207,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6214,7 +6215,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6222,7 +6223,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6230,7 +6231,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6238,7 +6239,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6246,7 +6247,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6254,7 +6255,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6262,7 +6263,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6270,7 +6271,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6278,7 +6279,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6286,7 +6287,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6294,7 +6295,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6302,13 +6303,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6316,7 +6317,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6324,13 +6325,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6338,7 +6339,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6346,13 +6347,13 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, int __imm) { return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6360,7 +6361,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6368,13 +6369,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, int __imm) { return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6382,7 +6383,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6581,7 +6582,7 @@ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ (__v8sf)_mm256_setzero_ps()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6590,7 +6591,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6598,7 +6599,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6607,7 +6608,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6616,7 +6617,7 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6624,7 +6625,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6633,7 +6634,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps (__m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6642,7 +6643,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6650,7 +6651,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6659,7 +6660,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps (__m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6668,7 +6669,7 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6676,7 +6677,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6685,14 +6686,14 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -6700,7 +6701,7 @@ (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -6708,14 +6709,14 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -6723,7 +6724,7 @@ (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -6731,7 +6732,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, @@ -6739,7 +6740,7 @@ (__v4df) __O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, @@ -6747,7 +6748,7 @@ (__v4df) _mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, @@ -6755,7 +6756,7 @@ (__v4sf) __O); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, @@ -6763,7 +6764,7 @@ (__v4sf) _mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, @@ -6771,7 +6772,7 @@ (__v8sf) __O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, @@ -6779,7 +6780,7 @@ (__v8sf) _mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -6787,7 +6788,7 @@ (__v4si) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -6795,7 +6796,7 @@ (__v4si) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -6803,7 +6804,7 @@ (__v8si) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -6811,7 +6812,7 @@ (__v8si) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -6819,7 +6820,7 @@ (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -6827,7 +6828,7 @@ (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -6835,7 +6836,7 @@ (__v4di) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -6843,7 +6844,7 @@ (__v4di) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, @@ -6851,14 +6852,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, @@ -6866,13 +6867,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -6880,14 +6881,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -6895,13 +6896,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -6909,7 +6910,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -6917,7 +6918,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -6925,13 +6926,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, @@ -6939,14 +6940,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, @@ -6954,13 +6955,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, @@ -6968,14 +6969,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, @@ -6983,13 +6984,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, @@ -6997,14 +6998,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, @@ -7012,13 +7013,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, @@ -7026,14 +7027,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, @@ -7041,13 +7042,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -7055,7 +7056,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -7063,7 +7064,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -7071,13 +7072,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, @@ -7085,14 +7086,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, @@ -7100,13 +7101,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, @@ -7114,14 +7115,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, @@ -7129,13 +7130,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7143,7 +7144,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7151,7 +7152,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7159,13 +7160,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7173,7 +7174,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7181,7 +7182,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7189,13 +7190,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, @@ -7203,14 +7204,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, @@ -7218,13 +7219,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, @@ -7232,14 +7233,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, @@ -7247,13 +7248,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7261,7 +7262,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7269,7 +7270,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7277,13 +7278,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7291,7 +7292,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7299,7 +7300,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7307,13 +7308,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, @@ -7321,14 +7322,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, @@ -7336,13 +7337,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, @@ -7350,14 +7351,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, @@ -7365,13 +7366,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, @@ -7379,14 +7380,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, @@ -7394,13 +7395,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, @@ -7408,14 +7409,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, @@ -7423,13 +7424,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, @@ -7437,14 +7438,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, @@ -7453,13 +7454,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, @@ -7467,14 +7468,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, @@ -7482,13 +7483,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, @@ -7496,14 +7497,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, @@ -7511,26 +7512,26 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16 (__m256i __A) { return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, @@ -7538,13 +7539,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, @@ -7552,14 +7553,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, @@ -7567,13 +7568,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, @@ -7581,14 +7582,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, @@ -7596,13 +7597,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, @@ -7610,14 +7611,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, @@ -7625,19 +7626,19 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32 (__m256i __A) { return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -7645,7 +7646,7 @@ (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -7653,13 +7654,13 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm_cvtepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7667,7 +7668,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7675,7 +7676,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7683,13 +7684,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, @@ -7697,14 +7698,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, @@ -7712,7 +7713,7 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); @@ -7998,13 +7999,13 @@ (__v4di)_mm256_permutex_epi64((X), (C)), \ (__v4di)_mm256_setzero_si256()) -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { @@ -8013,7 +8014,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -8021,13 +8022,13 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -8035,7 +8036,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -8046,7 +8047,7 @@ #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8054,7 +8055,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8064,7 +8065,7 @@ #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -8073,7 +8074,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -8155,7 +8156,7 @@ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256()) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8163,7 +8164,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8171,7 +8172,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8179,7 +8180,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8187,7 +8188,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8195,7 +8196,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8203,7 +8204,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8211,7 +8212,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8239,7 +8240,7 @@ (__v4si)_mm_shuffle_epi32((A), (I)), \ (__v4si)_mm_setzero_si128()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8247,7 +8248,7 @@ (__v2df) __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8255,7 +8256,7 @@ (__v2df) _mm_setzero_pd ()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8263,7 +8264,7 @@ (__v4df) __W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8271,7 +8272,7 @@ (__v4df) _mm256_setzero_pd ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8279,7 +8280,7 @@ (__v4sf) __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8287,7 +8288,7 @@ (__v4sf) _mm_setzero_ps ()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8295,7 +8296,7 @@ (__v8sf) __W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8303,7 +8304,7 @@ (__v8sf) _mm256_setzero_ps ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, @@ -8311,7 +8312,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, @@ -8320,7 +8321,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, @@ -8328,7 +8329,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, @@ -8337,7 +8338,7 @@ (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8345,7 +8346,7 @@ (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8363,7 +8364,7 @@ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)) -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8371,7 +8372,7 @@ (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8389,6 +8390,7 @@ (__mmask8)(U)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLINTRIN_H */ Index: lib/Headers/avx512vlvbmi2intrin.h =================================================================== --- lib/Headers/avx512vlvbmi2intrin.h +++ lib/Headers/avx512vlvbmi2intrin.h @@ -29,9 +29,10 @@ #define __AVX512VLVBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, @@ -39,7 +40,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, @@ -47,7 +48,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, @@ -55,7 +56,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, @@ -63,21 +64,21 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) { __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, @@ -85,7 +86,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, @@ -93,7 +94,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, @@ -101,7 +102,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, @@ -109,7 +110,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, @@ -117,7 +118,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, @@ -125,7 +126,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, @@ -133,7 +134,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, @@ -141,7 +142,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, @@ -149,7 +150,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, @@ -157,7 +158,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, @@ -165,7 +166,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, @@ -173,21 +174,21 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) { __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) { __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, @@ -195,7 +196,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, @@ -203,7 +204,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, @@ -211,7 +212,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, @@ -219,7 +220,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, @@ -227,7 +228,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, @@ -235,7 +236,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, @@ -243,7 +244,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, @@ -503,7 +504,7 @@ (__v8hi)_mm_undefined_si128(), \ (__mmask8)-1) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, @@ -512,7 +513,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, @@ -521,7 +522,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, @@ -530,7 +531,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, @@ -539,7 +540,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, @@ -548,7 +549,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, @@ -557,7 +558,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, @@ -566,7 +567,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, @@ -575,7 +576,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, @@ -584,7 +585,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, @@ -593,7 +594,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, @@ -602,7 +603,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, @@ -611,7 +612,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, @@ -620,7 +621,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, @@ -629,7 +630,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, @@ -638,7 +639,7 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, @@ -647,7 +648,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, @@ -656,7 +657,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, @@ -665,7 +666,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, @@ -674,7 +675,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, @@ -683,7 +684,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, @@ -692,7 +693,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, @@ -701,7 +702,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, @@ -710,7 +711,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, @@ -719,7 +720,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, @@ -728,7 +729,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, @@ -737,7 +738,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, @@ -746,7 +747,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, @@ -755,7 +756,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, @@ -764,7 +765,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, @@ -773,7 +774,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, @@ -782,7 +783,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, @@ -791,7 +792,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, @@ -800,7 +801,7 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, @@ -809,7 +810,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, @@ -818,7 +819,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, @@ -828,6 +829,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlvnniintrin.h =================================================================== --- lib/Headers/avx512vlvnniintrin.h +++ lib/Headers/avx512vlvnniintrin.h @@ -29,17 +29,18 @@ #define __AVX512VLVNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -47,7 +48,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -55,14 +56,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -70,7 +71,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -78,14 +79,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -93,7 +94,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -101,14 +102,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -116,7 +117,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -124,14 +125,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -139,7 +140,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -147,14 +148,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -162,7 +163,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -170,14 +171,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -185,7 +186,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -193,14 +194,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -208,7 +209,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -216,6 +217,7 @@ (__v4si)_mm_setzero_si128()); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vnniintrin.h =================================================================== --- lib/Headers/avx512vnniintrin.h +++ lib/Headers/avx512vnniintrin.h @@ -29,7 +29,7 @@ #define __AVX512VNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vpopcntdqintrin.h =================================================================== --- lib/Headers/avx512vpopcntdqintrin.h +++ lib/Headers/avx512vpopcntdqintrin.h @@ -31,8 +31,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \ - "q"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); Index: lib/Headers/avx512vpopcntdqvlintrin.h =================================================================== --- lib/Headers/avx512vpopcntdqvlintrin.h +++ lib/Headers/avx512vpopcntdqvlintrin.h @@ -30,69 +30,76 @@ #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128( (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256( (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avxintrin.h =================================================================== --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -50,7 +50,8 @@ typedef long long __m256i __attribute__((__vector_size__(32))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128))) /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. @@ -780,7 +781,7 @@ /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); @@ -873,7 +874,7 @@ /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); @@ -2540,7 +2541,7 @@ /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); @@ -2569,7 +2570,7 @@ /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); @@ -2599,7 +2600,7 @@ /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); @@ -2628,7 +2629,7 @@ /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); @@ -2657,7 +2658,7 @@ /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); @@ -2687,7 +2688,7 @@ /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); @@ -2991,7 +2992,7 @@ /// \headerfile /// /// This intrinsic corresponds to the VZEROALL instruction. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroall(void) { __builtin_ia32_vzeroall(); @@ -3002,7 +3003,7 @@ /// \headerfile /// /// This intrinsic corresponds to the VZEROUPPER instruction. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroupper(void) { __builtin_ia32_vzeroupper(); @@ -3021,7 +3022,7 @@ /// The single-precision floating point value to be broadcast. /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set /// equal to the broadcast value. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_broadcast_ss(float const *__a) { float __f = *__a; @@ -3370,7 +3371,7 @@ /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [2 x double] containing the loaded values. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_maskload_pd(double const *__p, __m128i __m) { return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m); @@ -3419,7 +3420,7 @@ /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [4 x float] containing the loaded values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_maskload_ps(float const *__p, __m128i __m) { return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m); @@ -3492,7 +3493,7 @@ /// changed. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a) { __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a); @@ -3540,7 +3541,7 @@ /// changed. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) { __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a); @@ -5133,5 +5134,6 @@ } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVXINTRIN_H */ Index: lib/Headers/emmintrin.h =================================================================== --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -45,8 +45,8 @@ typedef signed char __v16qs __attribute__((__vector_size__(16))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64))) /// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result @@ -4129,7 +4129,7 @@ /// A pointer to the 32-bit memory location used to store the value. /// \param __a /// A 32-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si32(int *__p, int __a) { __builtin_ia32_movnti(__p, __a); @@ -4149,7 +4149,7 @@ /// A pointer to the 64-bit memory location used to store the value. /// \param __a /// A 64-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si64(long long *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); Index: lib/Headers/f16cintrin.h =================================================================== --- lib/Headers/f16cintrin.h +++ lib/Headers/f16cintrin.h @@ -29,8 +29,10 @@ #define __F16CINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, * but that's because icc can emulate these without f16c using a library call. @@ -47,7 +49,7 @@ /// \param __a /// A 16-bit half-precision float value. /// \returns The converted 32-bit float value. -static __inline float __DEFAULT_FN_ATTRS +static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; @@ -118,7 +120,7 @@ /// A 128-bit vector containing 16-bit half-precision float values. The lower /// 64 bits are used in the conversion. /// \returns A 128-bit vector of [4 x float] containing converted float values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); @@ -162,12 +164,13 @@ /// converted to 32-bit single-precision float values. /// \returns A vector of [8 x float] containing the converted 32-bit /// single-precision float values. -static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) +static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) { return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __F16CINTRIN_H */ Index: lib/Headers/fma4intrin.h =================================================================== --- lib/Headers/fma4intrin.h +++ lib/Headers/fma4intrin.h @@ -31,200 +31,202 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __FMA4INTRIN_H */ Index: lib/Headers/fmaintrin.h =================================================================== --- lib/Headers/fmaintrin.h +++ lib/Headers/fmaintrin.h @@ -29,200 +29,202 @@ #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __FMAINTRIN_H */ Index: lib/Headers/gfniintrin.h =================================================================== --- lib/Headers/gfniintrin.h +++ lib/Headers/gfniintrin.h @@ -120,16 +120,17 @@ U, A, B, I) /* Default attributes for simple form (no masking). */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) /* Default attributes for YMM unmasked form. */ -#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"))) +#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) +#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) /* Default attributes for VLX forms. */ -#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) +#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) @@ -138,7 +139,7 @@ (__v16qi) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128(__U, @@ -146,7 +147,7 @@ (__v16qi) __S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), @@ -160,7 +161,7 @@ (__v32qi) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256(__U, @@ -168,7 +169,7 @@ (__v32qi) __S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), @@ -200,7 +201,8 @@ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_Y #undef __DEFAULT_FN_ATTRS_Z -#undef __DEFAULT_FN_ATTRS_VL +#undef __DEFAULT_FN_ATTRS_VL128 +#undef __DEFAULT_FN_ATTRS_VL256 #endif /* __GFNIINTRIN_H */ Index: lib/Headers/mm3dnow.h =================================================================== --- lib/Headers/mm3dnow.h +++ lib/Headers/mm3dnow.h @@ -30,9 +30,9 @@ typedef float __v2sf __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) _m_femms(void) { __builtin_ia32_femms(); } @@ -134,7 +134,7 @@ /* Handle the 3dnowa instructions here. */ #undef __DEFAULT_FN_ATTRS -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2iw(__m64 __m) { Index: lib/Headers/mmintrin.h =================================================================== --- lib/Headers/mmintrin.h +++ lib/Headers/mmintrin.h @@ -32,7 +32,7 @@ typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. @@ -41,7 +41,7 @@ /// /// This intrinsic corresponds to the EMMS instruction. /// -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) _mm_empty(void) { __builtin_ia32_emms(); Index: lib/Headers/pmmintrin.h =================================================================== --- lib/Headers/pmmintrin.h +++ lib/Headers/pmmintrin.h @@ -28,7 +28,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) + __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128))) /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. Index: lib/Headers/shaintrin.h =================================================================== --- lib/Headers/shaintrin.h +++ lib/Headers/shaintrin.h @@ -29,7 +29,7 @@ #define __SHAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128))) #define _mm_sha1rnds4_epu32(V1, V2, M) \ __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)) Index: lib/Headers/smmintrin.h =================================================================== --- lib/Headers/smmintrin.h +++ lib/Headers/smmintrin.h @@ -27,7 +27,7 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 Index: lib/Headers/tmmintrin.h =================================================================== --- lib/Headers/tmmintrin.h +++ lib/Headers/tmmintrin.h @@ -27,8 +27,8 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer Index: lib/Headers/vaesintrin.h =================================================================== --- lib/Headers/vaesintrin.h +++ lib/Headers/vaesintrin.h @@ -29,10 +29,10 @@ #define __VAESINTRIN_H /* Default attributes for YMM forms. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS Index: lib/Headers/xmmintrin.h =================================================================== --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -40,8 +40,8 @@ #endif /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64))) /// Adds the 32-bit float values in the low-order bits of the operands. /// Index: lib/Headers/xopintrin.h =================================================================== --- lib/Headers/xopintrin.h +++ lib/Headers/xopintrin.h @@ -31,7 +31,8 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) @@ -201,7 +202,7 @@ return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); @@ -765,18 +766,19 @@ return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_frcz_ps(__m256 __A) { return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_frcz_pd(__m256d __A) { return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS256 #endif /* __XOPINTRIN_H */ Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -2942,6 +2942,23 @@ D->addAttr(NewAttr); } +static void handleMinVectorWidthAttr(Sema &S, Decl *D, const AttributeList &AL) { + Expr *E = AL.getArgAsExpr(0); + uint32_t VecWidth; + if (!checkUInt32Argument(S, AL, E, VecWidth)) { + AL.setInvalid(); + return; + } + + MinVectorWidthAttr *Existing = D->getAttr(); + if (Existing && Existing->getVectorWidth() != VecWidth) + S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL.getName(); + + D->addAttr(::new (S.Context) + MinVectorWidthAttr(AL.getRange(), S.Context, VecWidth, + AL.getAttributeSpellingListIndex())); +} + static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &AL) { Expr *E = AL.getArgAsExpr(0); SourceLocation Loc = E->getExprLoc(); @@ -6129,6 +6146,9 @@ case AttributeList::AT_Target: handleTargetAttr(S, D, AL); break; + case AttributeList::AT_MinVectorWidth: + handleMinVectorWidthAttr(S, D, AL); + break; case AttributeList::AT_Unavailable: handleAttrWithMessage(S, D, AL); break; Index: test/CodeGen/function-min-vector-width.c =================================================================== --- /dev/null +++ test/CodeGen/function-min-vector-width.c @@ -0,0 +1,7 @@ +// This test verifies that we produce min-legal-vector-width attributes + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s + +void __attribute((__min_vector_width__(128))) foo() {} + +// CHECK: "min-legal-vector-width"="128" Index: test/CodeGen/x86-builtins-vector-width.c =================================================================== --- /dev/null +++ test/CodeGen/x86-builtins-vector-width.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s + +typedef signed long long V2LLi __attribute__((vector_size(16))); +typedef signed long long V4LLi __attribute__((vector_size(32))); + +// Make sure builtin forces a min-legal-width attribute +void foo(void) { + V2LLi tmp_V2LLi; + + tmp_V2LLi = __builtin_ia32_undef128(); +} + +// Make sure explicit attribute larger than builtin wins. +void goo(void) __attribute__((__min_vector_width__(256))) { + V2LLi tmp_V2LLi; + + tmp_V2LLi = __builtin_ia32_undef128(); +} + +// Make sure builtin larger than explicit attribute wins. +void hoo(void) __attribute__((__min_vector_width__(128))) { + V4LLi tmp_V4LLi; + + tmp_V4LLi = __builtin_ia32_undef256(); +} + +// CHECK: foo{{.*}} #0 +// CHECK: goo{{.*}} #1 +// CHECK: hoo{{.*}} #1 + +// CHECK: #0 = {{.*}}"min-legal-vector-width"="128" +// CHECK: #1 = {{.*}}"min-legal-vector-width"="256" Index: test/Sema/attr-min-vector-width.c =================================================================== --- /dev/null +++ test/Sema/attr-min-vector-width.c @@ -0,0 +1,8 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +int i; +void f(void) __attribute__((__min_vector_width__(i))); /* expected-error {{'__min_vector_width__' attribute requires an integer constant}} */ + +void f2(void) __attribute__((__min_vector_width__(128))); + +void f3(void) __attribute__((__min_vector_width__(128), __min_vector_width__(256))); /* expected-warning {{attribute '__min_vector_width__' is already applied with different parameters}} */