Index: include/clang/Basic/Attr.td =================================================================== --- include/clang/Basic/Attr.td +++ include/clang/Basic/Attr.td @@ -1943,6 +1943,13 @@ }]; } +def MinVectorWidth : InheritableAttr { + let Spellings = [Clang<"min_vector_width">]; + let Args = [UnsignedArgument<"VectorWidth">]; + let Subjects = SubjectList<[Function], ErrorDiag>; + let Documentation = [MinVectorWidthDocs]; +} + def TransparentUnion : InheritableAttr { let Spellings = [GCC<"transparent_union">]; // let Subjects = SubjectList<[Record, TypedefName]>; Index: include/clang/Basic/AttrDocs.td =================================================================== --- include/clang/Basic/AttrDocs.td +++ include/clang/Basic/AttrDocs.td @@ -1496,6 +1496,22 @@ }]; } +def MinVectorWidthDocs : Documentation { + let Category = DocCatFunction; + let Content = [{ +clang support the ``__attribute__((min_vector_width(width)))`` attribute. This +attribute may be attached to a function and informs that backend that this +function desires vectors of at least this width to be generated. Target specific +maximum vector widths still apply. This is attribute it meant to be a hint to +control target heuristics that may generate narrower vectors than what the +target hardware supports. + +This is currently used by the X86 target to allow some CPUs that support 512-bit +vectors to be limited to using 256-bit vectors to avoid frequency penalties. +This attribute can be used to override this behavior on certain functions. +}]; +} + def DocCatAMDGPUAttributes : DocumentationCategory<"AMD GPU Attributes">; def AMDGPUFlatWorkGroupSizeDocs : Documentation { Index: include/clang/Basic/Builtins.h =================================================================== --- include/clang/Basic/Builtins.h +++ include/clang/Basic/Builtins.h @@ -206,6 +206,8 @@ return getRecord(ID).Features; } + unsigned getRequiredVectorWidth(unsigned ID) const; + /// Return true if builtin ID belongs to AuxTarget. bool isAuxBuiltinID(unsigned ID) const { return ID >= (Builtin::FirstTSBuiltin + TSRecords.size()); Index: include/clang/Basic/Builtins.def =================================================================== --- include/clang/Basic/Builtins.def +++ include/clang/Basic/Builtins.def @@ -92,6 +92,7 @@ // e -> const, but only when -fno-math-errno // j -> returns_twice (like setjmp) // u -> arguments are not evaluated for their side-effects +// V:N: -> requires vectors of at least N bits to be legal // FIXME: gcc has nonnull #if defined(BUILTIN) && !defined(LIBBUILTIN) Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -36,9 +36,9 @@ // Undefined Values // -TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "") -TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "nc", "") -TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "nc", "") +TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "ncV:128:", "") +TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "ncV:256:", "") +TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "") // FLAGS // @@ -48,32 +48,32 @@ // 3DNow! // TARGET_BUILTIN(__builtin_ia32_femms, "v", "n", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "nc", "3dnow") -TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "nc", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "ncV:64:", "3dnow") +TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "ncV:64:", "3dnow") // 3DNow! Extensions (3dnowa). -TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "nc", "3dnowa") -TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "ncV:64:", "3dnowa") +TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "ncV:64:", "3dnowa") // MMX // @@ -85,258 +85,258 @@ // doesn't work in the presence of re-declaration of _mm_prefetch for windows. TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx") TARGET_BUILTIN(__builtin_ia32_emms, "v", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "n", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "nc", "mmx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2iIi", "nc", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "nV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "nV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "ncV:64:", "mmx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "ncV:64:", "mmx") // MMX2 (MMX+SSE) intrinsics -TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "nc", "mmx,sse") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "nc", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "ncV:64:", "mmx,sse") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "ncV:64:", "mmx,sse") // MMX+SSE2 -TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "nc", "mmx,sse2") -TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "ncV:64:", "mmx,sse2") +TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2") // MMX+SSSE3 -TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "nc", "mmx,ssse3") -TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "nc", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3") +TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3") // SSE intrinsics. -TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "nc", "sse") - -TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "nc", "sse2") - -TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "nc", "sse") - -TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "nc", "sse2") - -TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "nc", "sse3") -TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "nc", "ssse3") -TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "nc", "ssse3") +TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "ncV:128:", "sse") + +TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "ncV:128:", "sse2") + +TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse") + +TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "ncV:128:", "sse2") + +TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "ncV:128:", "sse3") +TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3") +TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3") TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse") TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse") TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse") TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse") -TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "n", "sse") -TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "n", "sse") -TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "n", "sse") +TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "nV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "nV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse") TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse") TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse") -TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "nc", "sse") - -TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2") -TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "ncV:128:", "sse") + +TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "nV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2") -TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "n", "sse2") TARGET_HEADER_BUILTIN(_mm_clflush, "vvC*", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2") TARGET_BUILTIN(__builtin_ia32_lfence, "v", "n", "sse2") @@ -345,342 +345,342 @@ TARGET_HEADER_BUILTIN(_mm_mfence, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2") TARGET_BUILTIN(__builtin_ia32_pause, "v", "n", "") TARGET_HEADER_BUILTIN(_mm_pause, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "") -TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2LLiV2LLiIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2LLiV2LLiIi", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2LLiV2LLiIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2LLiV2LLiIi", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "n", "sse3") TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3") -TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "n", "sse3") - -TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "nc", "ssse3") - -TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "nc", "sse4.1") - -TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "nc", "sse4.1") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "nV:128:", "sse3") + +TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "ncV:128:", "ssse3") + +TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1") + +TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "ncV:128:", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "ncV:128:", "sse4.1") // SSE 4.2 -TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","nc", "sse4.2") - -TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","nc", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","ncV:128:", "sse4.2") + +TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2") +TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2") TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2") TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2") TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2") // SSE4a -TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "nc", "sse4a") -TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "n", "sse4a") -TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "n", "sse4a") +TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "ncV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "nV:128:", "sse4a") +TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "nV:128:", "sse4a") // AES -TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "nc", "aes") -TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "nc", "aes") +TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "ncV:128:", "aes") +TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "ncV:128:", "aes") // VAES -TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") -TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") -TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") -TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "nc", "vaes") -TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes") // GFNI -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "nc", "gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "nc", "avx,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "nc", "avx512bw,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "nc", "gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "nc", "avx,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "nc", "avx512bw,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "nc", "gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "nc", "avx,gfni") -TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "nc", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512bw,gfni") // CLMUL -TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "nc", "pclmul") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "ncV:128:", "pclmul") // VPCLMULQDQ -TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "nc", "vpclmulqdq") -TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "nc", "avx512f,vpclmulqdq") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "ncV:256:", "vpclmulqdq") +TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "ncV:512:", "avx512f,vpclmulqdq") // AVX -TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "ncV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx") TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "n", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "nV:128:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "nV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx") // AVX2 -TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pshufhw256, "V16sV16sIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4LLiV4LLiIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4LLiV4LLiIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "nc", "avx2") -TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "nc", "avx2") +TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pshufhw256, "V16sV16sIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4LLiV4LLiIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4LLiV4LLiIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "ncV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2") // GATHER -TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "n", "avx2") - -TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "n", "avx2") -TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "n", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "nV:256:", "avx2") + +TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "nV:256:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "nV:128:", "avx2") +TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "nV:256:", "avx2") // F16C -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "nc", "f16c") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "nc", "f16c") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "nc", "f16c") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "nc", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "ncV:128:", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "ncV:256:", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "ncV:128:", "f16c") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "ncV:256:", "f16c") // RDRAND TARGET_BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "n", "rdrnd") @@ -745,109 +745,109 @@ TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "n", "lwp") // SHA -TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "nc", "sha") -TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "nc", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "ncV:128:", "sha") +TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "ncV:128:", "sha") // FMA -TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "nc", "fma") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "nc", "fma") -TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "nc", "fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "nc", "fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "nc", "fma|fma4") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "nc", "fma|fma4") - -TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "ncV:128:", "fma") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "ncV:128:", "fma") +TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "ncV:128:", "fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "ncV:128:", "fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4") + +TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") // XOP -TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "nc", "xop") - -TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "nc", "xop") -TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "nc", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "ncV:128:", "xop") + +TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "ncV:256:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "ncV:256:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "ncV:128:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "ncV:256:", "xop") +TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "ncV:256:", "xop") TARGET_BUILTIN(__builtin_ia32_xbegin, "i", "n", "rtm") TARGET_BUILTIN(__builtin_ia32_xend, "v", "n", "rtm") @@ -866,876 +866,876 @@ TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "n", "pku") // AVX-512 -TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er") - -TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er") -TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er") - -TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fIiUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpps256_mask, "UcV8fV8fIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpps128_mask, "UcV4fV4fIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_alignq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "nc", "avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni") -TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "nc", "avx512vnni") - -TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLiLLiC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4ffC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4iiC*V2LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4ffC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4iiC*V4LLiUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2ddC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLiLLiC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4ddC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLiLLiC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4ffC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4iiC*V4iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8ffC*V8iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8iiC*V8iUcIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8ddC*V8iUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16ffC*V16fUsIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8ddC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8ffC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiLLiC*V8iUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iiC*V16iUsIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiLLiC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iiC*V8LLiUcIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vd*UcV8iV8dIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vf*UsV16iV16fIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8df, "vd*UcV8LLiV8dIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vf*UcV8LLiV8fIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv8di, "vLLi*UcV8iV8LLiIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vi*UsV16iV16iIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vLLi*UcV8LLiV8LLiIi", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vi*UcV8LLiV8iIi", "n", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8iLLiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16iiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8LLiLLiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8LLiiC*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iLLi*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "n", "avx512pf") -TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "n", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") + +TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er") +TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er") + +TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_cmpps512_mask, "UsV16fV16fIiUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpps256_mask, "UcV8fV8fIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpps128_mask, "UcV4fV4fIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "ncV:128:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_alignq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni") +TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni") + +TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLiLLiC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4ffC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4iiC*V2LLiUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4ffC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4iiC*V4LLiUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2ddC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLiLLiC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4ddC*V4iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLiLLiC*V4iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4ffC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4iiC*V4iUcIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8ffC*V8iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8iiC*V8iUcIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8ddC*V8iUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16ffC*V16fUsIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8ddC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8ffC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiLLiC*V8iUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iiC*V16iUsIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiLLiC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iiC*V8LLiUcIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vd*UcV8iV8dIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vf*UsV16iV16fIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8df, "vd*UcV8LLiV8dIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vf*UcV8LLiV8fIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv8di, "vLLi*UcV8iV8LLiIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vi*UsV16iV16iIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8di, "vLLi*UcV8LLiV8LLiIi", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vi*UcV8LLiV8iIi", "nV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_gatherpfdpd, "vUcV8iLLiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_gatherpfdps, "vUsV16iiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_gatherpfqpd, "vUcV8LLiLLiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_gatherpfqps, "vUcV8LLiiC*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iLLi*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf") +TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf") TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "nc", "avx512cd") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "nc", "avx512cd") - -TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "nc", "avx512vpopcntdq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "nc", "avx512vpopcntdq") -TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "nc", "avx512vpopcntdq") - -TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "nc", "avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "nc", "avx512bitalg") - -TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "nc", "avx512vl,avx512bitalg") -TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "nc", "avx512bitalg") - -TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") - -TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "nc", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "nc", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "n", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "n", "avx512vl,avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "nc", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vd*UcV2LLiV2dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vLLi*UcV2LLiV2LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vd*UcV4LLiV4dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vLLi*UcV4LLiV4LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vf*UcV2LLiV4fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vi*UcV2LLiV4iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vf*UcV4LLiV4fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vi*UcV4LLiV4iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vd*UcV4iV2dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vLLi*UcV4iV2LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vd*UcV4iV4dIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vLLi*UcV4iV4LLiIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vf*UcV4iV4fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vi*UcV4iV4iIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "n", "avx512vl") - -TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "nc", "avx512bw") - -TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "nc", "avx512vl,avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "nc", "avx512vbmi2") - -TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl,avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prolq512, "V8LLiV8LLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolq128, "V2LLiV2LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolq256, "V4LLiV4LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prorq512, "V8LLiV8LLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorq128, "V2LLiV2LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorq256, "V4LLiV4LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "nc ", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "nc ", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8LLiV8LLiIi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8LLiV8LLiIi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512cd") + +TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512vpopcntdq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512vpopcntdq") + +TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "ncV:256:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "ncV:256:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "ncV:512:", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") + +TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "nV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "nV:256:", "avx512vl,avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vd*UcV2LLiV2dIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vLLi*UcV2LLiV2LLiIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vd*UcV4LLiV4dIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vLLi*UcV4LLiV4LLiIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vf*UcV2LLiV4fIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vi*UcV2LLiV4iIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vf*UcV4LLiV4fIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vi*UcV4LLiV4iIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vd*UcV4iV2dIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vLLi*UcV4iV2LLiIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vd*UcV4iV4dIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vLLi*UcV4iV4LLiIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vf*UcV4iV4fIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vi*UcV4iV4iIi", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "nV:256:", "avx512vl") + +TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "avx512bw") + +TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2") + +TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prolq512, "V8LLiV8LLiIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolq128, "V2LLiV2LLiIi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolq256, "V4LLiV4LLiIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prorq512, "V8LLiV8LLiIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorq128, "V2LLiV2LLiIi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorq256, "V4LLiV4LLiIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8LLiV8LLiIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8LLiV8LLiIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi", "nc", "avx512bw") TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "n", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "nc", "avx512cd,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw") -TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permdi512, "V8LLiV8LLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "nc", "avx512dq,avx512vl") -TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "nc", "avx512dq") -TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "nc", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "nV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "nV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "nV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "nV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "nV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512cd,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw") +TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permdi512, "V8LLiV8LLiIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "ncV:128:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "ncV:256:", "avx512dq,avx512vl") +TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq") +TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq") TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f") @@ -1744,73 +1744,73 @@ TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f") TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "nc", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "n", "avx512vbmi2") -TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "n", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl") -TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "nc", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512vbmi2") +TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512vbmi") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "ncV:128:", "avx512vbmi,avx512vl") +TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "ncV:256:", "avx512vbmi,avx512vl") // generic select intrinsics -TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "nc", "avx512bw,avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "nc", "avx512bw") -TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "nc", "avx512vl") -TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "avx512bw,avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw") +TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "avx512vl") +TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f") // MONITORX/MWAITX TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "n", "mwaitx") Index: include/clang/Basic/BuiltinsX86_64.def =================================================================== --- include/clang/Basic/BuiltinsX86_64.def +++ include/clang/Basic/BuiltinsX86_64.def @@ -44,16 +44,16 @@ TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "ULLi", "n", "") TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vULLi", "n", "") -TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "nc", "sse") -TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "nc", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "ncV:128:", "sse") +TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "ncV:128:", "sse2") TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "n", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "nc", "sse2") -TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "nc", "sse4.1") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "ncV:128:", "sse2") +TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "ncV:128:", "sse4.1") TARGET_BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "nc", "sse4.2") -TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "nc", "avx") -TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "nc", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "ncV:256:", "avx") +TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "ncV:256:", "avx") TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase") TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "ULLi", "n", "fsgsbase") TARGET_BUILTIN(__builtin_ia32_rdgsbase32, "Ui", "n", "fsgsbase") @@ -86,18 +86,18 @@ TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "nc", "tbm") TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "n", "lwp") TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "n", "lwp") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "nc", "avx512f") -TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "nc", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "ncV:128:", "avx512f") +TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "ncV:128:", "avx512f") TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri") TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vULLi", "n", "ptwrite") Index: lib/Basic/Builtins.cpp =================================================================== --- lib/Basic/Builtins.cpp +++ lib/Basic/Builtins.cpp @@ -107,6 +107,21 @@ Table.get(getRecord(ID).Name).setBuiltinID(0); } +unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const { + const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V'); + if (!WidthPos) + return 0; + + ++WidthPos; + assert(*WidthPos == ':' && "Vector width specifier must be followed by a ':'"); + ++WidthPos; + + + assert(::strchr(WidthPos, ':') && + "Vector width specifier must be end with a ':'"); + return ::strtol(WidthPos, nullptr, 10); +} + bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx, bool &HasVAListArg, const char *Fmt) const { assert(Fmt && "Not passed a format string"); Index: lib/CodeGen/CGBuiltin.cpp =================================================================== --- lib/CodeGen/CGBuiltin.cpp +++ lib/CodeGen/CGBuiltin.cpp @@ -3654,6 +3654,9 @@ // can move this up to the beginning of the function. checkTargetFeatures(E, FD); + if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID)) + LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth); + // See if we have a target specific intrinsic. const char *Name = getContext().BuiltinInfo.getName(BuiltinID); Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic; Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h +++ lib/CodeGen/CodeGenFunction.h @@ -1463,6 +1463,10 @@ /// Terminate funclets keyed by parent funclet pad. llvm::MapVector TerminateFunclets; + /// Largest vector with used in ths function. Will be used to create a + /// function attribute. + unsigned LargestVectorWidth; + /// True if we need emit the life-time markers. const bool ShouldEmitLifetimeMarkers; Index: lib/CodeGen/CodeGenFunction.cpp =================================================================== --- lib/CodeGen/CodeGenFunction.cpp +++ lib/CodeGen/CodeGenFunction.cpp @@ -81,7 +81,7 @@ CXXStructorImplicitParamDecl(nullptr), CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr), CurLexicalScope(nullptr), TerminateLandingPad(nullptr), - TerminateHandler(nullptr), TrapBB(nullptr), + TerminateHandler(nullptr), TrapBB(nullptr), LargestVectorWidth(0), ShouldEmitLifetimeMarkers( shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) { if (!suppressNewContext) @@ -445,6 +445,11 @@ cast(NormalCleanupDest.getPointer()), DT); NormalCleanupDest = Address::invalid(); } + + // Add the required-vector-width attribute + if (LargestVectorWidth != 0) + CurFn->addFnAttr("min-legal-vector-width", + llvm::utostr(LargestVectorWidth)); } /// ShouldInstrumentFunction - Return true if the current function should be @@ -1186,6 +1191,12 @@ // Emit a location at the end of the prologue. if (CGDebugInfo *DI = getDebugInfo()) DI->EmitLocation(Builder, StartLoc); + + // TODO: Do we need to handle this in two places like we do with + // target-features/target-cpu? + if (CurFuncDecl) + if (auto *VecWidth = CurFuncDecl->getAttr()) + LargestVectorWidth = VecWidth->getVectorWidth(); } void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args, Index: lib/Headers/__wmmintrin_aes.h =================================================================== --- lib/Headers/__wmmintrin_aes.h +++ lib/Headers/__wmmintrin_aes.h @@ -29,7 +29,7 @@ #define __WMMINTRIN_AES_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128))) /// Performs a single round of AES encryption using the Equivalent /// Inverse Cipher, transforming the state value from the first source Index: lib/Headers/ammintrin.h =================================================================== --- lib/Headers/ammintrin.h +++ lib/Headers/ammintrin.h @@ -27,7 +27,7 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128))) /// Extracts the specified bits from the lower 64 bits of the 128-bit /// integer vector operand at the index \a idx and of the length \a len. Index: lib/Headers/avx2intrin.h =================================================================== --- lib/Headers/avx2intrin.h +++ lib/Headers/avx2intrin.h @@ -29,98 +29,99 @@ #define __AVX2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2"))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128))) /* SSE4 Multiple Packed Sums of Absolute Difference. */ #define _mm256_mpsadbw_epu8(X, Y, M) \ (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \ (__v32qi)(__m256i)(Y), (int)(M)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi8(__m256i __a) { return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi16(__m256i __a) { return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi32(__m256i __a) { return (__m256i)__builtin_ia32_pabsd256((__v8si)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packs_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_packus_epi32(__m256i __V1, __m256i __V2) { return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a + (__v32qu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a + (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a + (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_add_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a + (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_adds_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b); @@ -130,19 +131,19 @@ (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \ (__v32qi)(__m256i)(b), (n)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_and_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a & (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_andnot_si256(__m256i __a, __m256i __b) { return (__m256i)(~(__v4du)__a & (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu8(__m256i __a, __m256i __b) { typedef unsigned short __v32hu __attribute__((__vector_size__(64))); @@ -152,7 +153,7 @@ >> 1, __v32qu); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_avg_epu16(__m256i __a, __m256i __b) { typedef unsigned int __v16su __attribute__((__vector_size__(64))); @@ -162,7 +163,7 @@ >> 1, __v16hu); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M) { return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2, @@ -173,31 +174,31 @@ (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \ (__v16hi)(__m256i)(V2), (int)(M)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qi)__a == (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a == (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a == (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpeq_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a == (__v4di)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi8(__m256i __a, __m256i __b) { /* This function always performs a signed comparison, but __v32qi is a char @@ -205,151 +206,151 @@ return (__m256i)((__v32qs)__a > (__v32qs)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hi)__a > (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8si)__a > (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmpgt_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4di)__a > (__v4di)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadd_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hadds_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsub_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_hsubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maddubs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS256 _mm256_movemask_epi8(__m256i __a) { return __builtin_ia32_pmovmskb256((__v32qi)__a); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi16(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -357,7 +358,7 @@ return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi32(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -365,7 +366,7 @@ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi8_epi64(__m128i __V) { /* This function always performs a signed extension, but __v16qi is a char @@ -373,115 +374,115 @@ return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4si)__V, __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi16(__m128i __V) { return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi32(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu8_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi32(__m128i __V) { return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu16_epi64(__m128i __V) { return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_epi64(__m128i __V) { return (__m256i)__builtin_convertvector((__v4su)__V, __v4di); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhrs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mulhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a * (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi32 (__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a * (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mul_epu32(__m256i __a, __m256i __b) { return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_or_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a | (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sad_epu8(__m256i __a, __m256i __b) { return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shuffle_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b); @@ -496,19 +497,19 @@ #define _mm256_shufflelo_epi16(a, imm) \ (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sign_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b); @@ -520,61 +521,61 @@ #define _mm256_bslli_epi128(a, imm) \ (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_slli_epi64(__m256i __a, int __count) { return __builtin_ia32_psllqi256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sll_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psllq256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count); @@ -586,176 +587,176 @@ #define _mm256_bsrli_epi128(a, imm) \ (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi16(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi16(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi32(__m256i __a, int __count) { return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi32(__m256i __a, __m128i __count) { return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srli_epi64(__m256i __a, int __count) { return __builtin_ia32_psrlqi256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srl_epi64(__m256i __a, __m128i __count) { return __builtin_ia32_psrlq256((__v4di)__a, __count); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi8(__m256i __a, __m256i __b) { return (__m256i)((__v32qu)__a - (__v32qu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi16(__m256i __a, __m256i __b) { return (__m256i)((__v16hu)__a - (__v16hu)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi32(__m256i __a, __m256i __b) { return (__m256i)((__v8su)__a - (__v8su)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sub_epi64(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a - (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu8(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_subs_epu16(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpackhi_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi8(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi16(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_unpacklo_epi64(__m256i __a, __m256i __b) { return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_xor_si256(__m256i __a, __m256i __b) { return (__m256i)((__v4du)__a ^ (__v4du)__b); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_stream_load_si256(__m256i const *__V) { typedef __v4di __v4di_aligned __attribute__((aligned(32))); return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_broadcastss_ps(__m128 __X) { return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_broadcastsd_pd(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcastss_ps(__m128 __X) { return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcastsd_pd(__m128d __X) { return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastsi128_si256(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1); @@ -769,56 +770,56 @@ (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \ (__v8si)(__m256i)(V2), (int)(M)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastb_epi8(__m128i __X) { return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastw_epi16(__m128i __X) { return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastd_epi32(__m128i __X) { return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastq_epi64(__m128i __X) { return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastb_epi8(__m128i __X) { return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastw_epi16(__m128i __X) { return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastd_epi32(__m128i __X) { return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastq_epi64(__m128i __X) { return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b) { return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b); @@ -827,7 +828,7 @@ #define _mm256_permute4x64_pd(V, M) \ (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b) { return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b); @@ -846,109 +847,109 @@ (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \ (__v2di)(__m128i)(V2), (int)(M)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi32(int const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskload_epi64(long long const *__X, __m256i __M) { return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi32(int const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskload_epi64(long long const *__X, __m128i __M) { return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y) { __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y) { __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi32(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi32(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y); @@ -1161,6 +1162,7 @@ (__v4di)(__m256i)(i), \ (__v4di)_mm256_set1_epi64x(-1), (s)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS256 +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVX2INTRIN_H */ Index: lib/Headers/avx512bitalgintrin.h =================================================================== --- lib/Headers/avx512bitalgintrin.h +++ lib/Headers/avx512bitalgintrin.h @@ -29,7 +29,7 @@ #define __AVX512BITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi16(__m512i __A) Index: lib/Headers/avx512bwintrin.h =================================================================== --- lib/Headers/avx512bwintrin.h +++ lib/Headers/avx512bwintrin.h @@ -32,7 +32,7 @@ typedef unsigned long long __mmask64; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512))) /* Integer compare */ Index: lib/Headers/avx512cdintrin.h =================================================================== --- lib/Headers/avx512cdintrin.h +++ lib/Headers/avx512cdintrin.h @@ -29,7 +29,7 @@ #define __AVX512CDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_conflict_epi64 (__m512i __A) Index: lib/Headers/avx512dqintrin.h =================================================================== --- lib/Headers/avx512dqintrin.h +++ lib/Headers/avx512dqintrin.h @@ -29,7 +29,7 @@ #define __AVX512DQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_mullo_epi64 (__m512i __A, __m512i __B) { Index: lib/Headers/avx512fintrin.h =================================================================== --- lib/Headers/avx512fintrin.h +++ lib/Headers/avx512fintrin.h @@ -173,11 +173,12 @@ } _MM_MANTISSA_SIGN_ENUM; /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f"))) +#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128))) /* Create vectors with repeated elements */ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_setzero_si512(void) { return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 }; @@ -185,38 +186,38 @@ #define _mm512_setzero_epi32 _mm512_setzero_si512 -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_undefined_pd(void) { return (__m512d)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined(void) { return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_undefined_ps(void) { return (__m512)__builtin_ia32_undef512(); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_undefined_epi32(void) { return (__m512i)__builtin_ia32_undef512(); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastd_epi32 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, @@ -224,7 +225,7 @@ (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512(__M, @@ -232,14 +233,14 @@ (__v16si) _mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcastq_epi64 (__m128i __A) { return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, @@ -248,7 +249,7 @@ } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512(__M, @@ -257,7 +258,7 @@ } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_setzero_ps(void) { return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, @@ -266,26 +267,26 @@ #define _mm512_setzero _mm512_setzero_ps -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_setzero_pd(void) { return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 }; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set1_ps(float __w) { return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set1_pd(double __w) { return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi8(char __w) { return __extension__ (__m512i)(__v64qi){ @@ -299,7 +300,7 @@ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi16(short __w) { return __extension__ (__m512i)(__v32hi){ @@ -309,7 +310,7 @@ __w, __w, __w, __w, __w, __w, __w, __w }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi32(int __s) { return __extension__ (__m512i)(__v16si){ @@ -317,7 +318,7 @@ __s, __s, __s, __s, __s, __s, __s, __s }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi32(__mmask16 __M, int __A) { return (__m512i)__builtin_ia32_selectd_512(__M, @@ -325,13 +326,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set1_epi64(long long __d) { return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A) { return (__m512i)__builtin_ia32_selectq_512(__M, @@ -339,14 +340,14 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcastss_ps(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi32 (int __A, int __B, int __C, int __D) { return __extension__ (__m512i)(__v16si) @@ -354,7 +355,7 @@ __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set4_epi64 (long long __A, long long __B, long long __C, long long __D) { @@ -362,14 +363,14 @@ { __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_set4_pd (double __A, double __B, double __C, double __D) { return __extension__ (__m512d) { __D, __C, __B, __A, __D, __C, __B, __A }; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_set4_ps (float __A, float __B, float __C, float __D) { return __extension__ (__m512) @@ -389,7 +390,7 @@ #define _mm512_setr4_ps(e0,e1,e2,e3) \ _mm512_set4_ps((e3),(e2),(e1),(e0)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcastsd_pd(__m128d __A) { return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A, @@ -398,122 +399,122 @@ /* Cast between vector types */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd256_pd512(__m256d __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castps256_ps512(__m256 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1); } -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd128(__m512d __a) { return __builtin_shufflevector(__a, __a, 0, 1); } -static __inline __m256d __DEFAULT_FN_ATTRS +static __inline __m256d __DEFAULT_FN_ATTRS512 _mm512_castpd512_pd256 (__m512d __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3); } -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps128(__m512 __a) { return __builtin_shufflevector(__a, __a, 0, 1, 2, 3); } -static __inline __m256 __DEFAULT_FN_ATTRS +static __inline __m256 __DEFAULT_FN_ATTRS512 _mm512_castps512_ps256 (__m512 __A) { return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castpd_ps (__m512d __A) { return (__m512) (__A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castpd_si512 (__m512d __A) { return (__m512i) (__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_castpd128_pd512 (__m128d __A) { return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castps_pd (__m512 __A) { return (__m512d) (__A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_castps_si512 (__m512 __A) { return (__m512i) (__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_castps128_ps512 (__m128 __A) { return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi128_si512 (__m128i __A) { return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_castsi256_si512 (__m256i __A) { return __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_castsi512_ps (__m512i __A) { return (__m512) (__A); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_castsi512_pd (__m512i __A) { return (__m512d) (__A); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si128 (__m512i __A) { return (__m128i)__builtin_shufflevector(__A, __A , 0, 1); } -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_castsi512_si256 (__m512i __A) { return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_int2mask(int __a) { return (__mmask16)__a; } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask2int(__mmask16 __a) { return (int)__a; @@ -532,7 +533,7 @@ /// A 128-bit vector of [2 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd128_pd512(__m128d __a) { return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3); @@ -551,7 +552,7 @@ /// A 256-bit vector of [4 x double]. /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_zextpd256_pd512(__m256d __a) { return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7); @@ -569,7 +570,7 @@ /// A 128-bit vector of [4 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits /// contain the value of the parameter. The upper 384 bits are set to zero. -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps128_ps512(__m128 __a) { return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7); @@ -587,7 +588,7 @@ /// A 256-bit vector of [8 x float]. /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits /// contain the value of the parameter. The upper 256 bits are set to zero. -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_zextps256_ps512(__m256 __a) { return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -605,7 +606,7 @@ /// A 128-bit integer vector. /// \returns A 512-bit integer vector. The lower 128 bits contain the value of /// the parameter. The upper 384 bits are set to zero. -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi128_si512(__m128i __a) { return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3); @@ -623,20 +624,20 @@ /// A 256-bit integer vector. /// \returns A 512-bit integer vector. The lower 256 bits contain the value of /// the parameter. The upper 256 bits are set to zero. -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_zextsi256_si512(__m256i __a) { return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7); } /* Bitwise operators */ -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a & (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, @@ -644,20 +645,20 @@ (__v16si) __src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k, @@ -665,26 +666,26 @@ (__v8di) __src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_si512 (__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi32 (__m512i __A, __m512i __B) { return (__m512i)(~(__v16su)__A & (__v16su)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -692,20 +693,20 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(), __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_andnot_epi64(__m512i __A, __m512i __B) { return (__m512i)(~(__v8du)__A & (__v8du)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -713,20 +714,20 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(), __U, __A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a | (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, @@ -734,19 +735,19 @@ (__v16si)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, @@ -754,19 +755,19 @@ (__v8di)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi32(__m512i __a, __m512i __b) { return (__m512i)((__v16su)__a ^ (__v16su)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k, @@ -774,19 +775,19 @@ (__v16si)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_epi64(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k, @@ -794,25 +795,25 @@ (__v8di)__src); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b) { return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_and_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a & (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_or_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a | (__v8du)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_xor_si512(__m512i __a, __m512i __b) { return (__m512i)((__v8du)__a ^ (__v8du)__b); @@ -820,49 +821,49 @@ /* Arithmetic */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_add_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a + (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_add_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a + (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mul_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a * (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mul_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a * (__v16sf)__b); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_sub_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a - (__v8df)__b); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_sub_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a - (__v16sf)__b); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A + (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -870,7 +871,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -878,13 +879,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A - (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -892,7 +893,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -900,13 +901,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_add_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A + (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -914,7 +915,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -922,13 +923,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sub_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A - (__v16su) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -936,7 +937,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -958,14 +959,14 @@ (__v8df)_mm512_max_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_max_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -973,7 +974,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -995,14 +996,14 @@ (__v16sf)_mm512_max_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_max_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -1010,7 +1011,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -1018,7 +1019,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1027,7 +1028,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1054,7 +1055,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1063,7 +1064,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1091,13 +1092,13 @@ (__mmask8)(U), (int)(R)) static __inline __m512i -__DEFAULT_FN_ATTRS +__DEFAULT_FN_ATTRS512 _mm512_max_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1105,7 +1106,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1113,13 +1114,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1127,7 +1128,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1135,13 +1136,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1149,7 +1150,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1157,13 +1158,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_max_epu64(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1171,7 +1172,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1193,14 +1194,14 @@ (__v8df)_mm512_min_round_pd((A), (B), (R)), \ (__v8df)_mm512_setzero_pd()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_min_pd(__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -1208,7 +1209,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -1230,14 +1231,14 @@ (__v16sf)_mm512_min_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_min_ps(__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -1245,7 +1246,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -1253,7 +1254,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1262,7 +1263,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -1289,7 +1290,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1298,7 +1299,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -1326,13 +1327,13 @@ (__mmask8)(U), (int)(R)) static __inline __m512i -__DEFAULT_FN_ATTRS +__DEFAULT_FN_ATTRS512 _mm512_min_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1340,7 +1341,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1348,13 +1349,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu32(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1362,7 +1363,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1370,13 +1371,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1384,7 +1385,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1392,13 +1393,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_min_epu64(__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1406,7 +1407,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1414,13 +1415,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1428,7 +1429,7 @@ (__v8di)__W); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1436,13 +1437,13 @@ (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mul_epu32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1450,7 +1451,7 @@ (__v8di)__W); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -1458,13 +1459,13 @@ (__v8di)_mm512_setzero_si512 ()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mullo_epi32 (__m512i __A, __m512i __B) { return (__m512i) ((__v16su) __A * (__v16su) __B); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1472,7 +1473,7 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -1480,12 +1481,12 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mullox_epi64 (__m512i __A, __m512i __B) { return (__m512i) ((__v8du) __A * (__v8du) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, (__v8di)_mm512_mullox_epi64(__A, __B), @@ -1505,14 +1506,14 @@ (__v8df)_mm512_sqrt_round_pd((A), (R)), \ (__v8df)_mm512_setzero_pd()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_sqrt_pd(__m512d __A) { return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -1520,7 +1521,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -1541,14 +1542,14 @@ (__v16sf)_mm512_sqrt_round_ps((A), (R)), \ (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_sqrt_ps(__m512 __A) { return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -1556,7 +1557,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -1564,7 +1565,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -1572,7 +1573,7 @@ _mm512_setzero_pd (), (__mmask8) -1);} -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -1580,7 +1581,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A, @@ -1589,7 +1590,7 @@ (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rsqrt14_ps(__m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -1598,7 +1599,7 @@ (__mmask16) -1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -1606,7 +1607,7 @@ (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A, @@ -1615,7 +1616,7 @@ (__mmask16) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1625,7 +1626,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1634,7 +1635,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A, @@ -1643,7 +1644,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A, @@ -1653,7 +1654,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, @@ -1662,7 +1663,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A, @@ -1671,7 +1672,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_rcp14_pd(__m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -1680,7 +1681,7 @@ (__mmask8) -1); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -1688,7 +1689,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A, @@ -1697,7 +1698,7 @@ (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_rcp14_ps(__m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -1706,7 +1707,7 @@ (__mmask16) -1); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -1714,7 +1715,7 @@ (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A, @@ -1723,7 +1724,7 @@ (__mmask16) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ss(__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1733,7 +1734,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1742,7 +1743,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A, @@ -1751,7 +1752,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_sd(__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A, @@ -1761,7 +1762,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, @@ -1770,7 +1771,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A, @@ -1779,7 +1780,7 @@ (__mmask8) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_floor_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1788,7 +1789,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1797,7 +1798,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_floor_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1806,7 +1807,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1815,7 +1816,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1824,7 +1825,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_ceil_ps(__m512 __A) { return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A, @@ -1833,7 +1834,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_ceil_pd(__m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1842,7 +1843,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A, @@ -1851,13 +1852,13 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi64(__m512i __A) { return (__m512i)__builtin_ia32_pabsq512((__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -1865,7 +1866,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -1873,13 +1874,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_abs_epi32(__m512i __A) { return (__m512i)__builtin_ia32_pabsd512((__v16si) __A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -1887,7 +1888,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -1895,14 +1896,14 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_add_ss(__A, __B); __A[0] = (__U & 1) ? __A[0] : __W[0]; return __A; } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_add_ss(__A, __B); __A[0] = (__U & 1) ? __A[0] : 0; @@ -1927,14 +1928,14 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_add_sd(__A, __B); __A[0] = (__U & 1) ? __A[0] : __W[0]; return __A; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_add_sd(__A, __B); __A[0] = (__U & 1) ? __A[0] : 0; @@ -1958,28 +1959,28 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_add_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_add_ps(__A, __B), @@ -2014,14 +2015,14 @@ (__v16sf)_mm512_add_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps()); -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_sub_ss(__A, __B); __A[0] = (__U & 1) ? __A[0] : __W[0]; return __A; } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_sub_ss(__A, __B); __A[0] = (__U & 1) ? __A[0] : 0; @@ -2045,14 +2046,14 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_sub_sd(__A, __B); __A[0] = (__U & 1) ? __A[0] : __W[0]; return __A; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_sub_sd(__A, __B); __A[0] = (__U & 1) ? __A[0] : 0; @@ -2077,28 +2078,28 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_sub_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_sub_ps(__A, __B), @@ -2133,14 +2134,14 @@ (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps()); -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_mul_ss(__A, __B); __A[0] = (__U & 1) ? __A[0] : __W[0]; return __A; } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) { __A = _mm_mul_ss(__A, __B); __A[0] = (__U & 1) ? __A[0] : 0; @@ -2164,14 +2165,14 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_mul_sd(__A, __B); __A[0] = (__U & 1) ? __A[0] : __W[0]; return __A; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) { __A = _mm_mul_sd(__A, __B); __A[0] = (__U & 1) ? __A[0] : 0; @@ -2196,28 +2197,28 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_mul_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_mul_ps(__A, __B), @@ -2252,7 +2253,7 @@ (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \ (__v16sf)_mm512_setzero_ps()); -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2261,7 +2262,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A, (__v4sf) __B, @@ -2288,7 +2289,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2297,7 +2298,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A, (__v2df) __B, @@ -2324,40 +2325,40 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_div_pd(__m512d __a, __m512d __b) { return (__m512d)((__v8df)__a/(__v8df)__b); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, (__v8df)_mm512_div_pd(__A, __B), (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_div_ps(__m512 __a, __m512 __b) { return (__m512)((__v16sf)__a/(__v16sf)__b); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, (__v16sf)_mm512_div_ps(__A, __B), @@ -2540,7 +2541,7 @@ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2550,7 +2551,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2560,7 +2561,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A, @@ -2570,7 +2571,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, @@ -2580,7 +2581,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2590,7 +2591,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2600,7 +2601,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A, @@ -2610,7 +2611,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2620,7 +2621,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A, @@ -2630,7 +2631,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, @@ -2640,7 +2641,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -2650,7 +2651,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A, @@ -2744,7 +2745,7 @@ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2754,7 +2755,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2764,7 +2765,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A, @@ -2774,7 +2775,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, @@ -2784,7 +2785,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2794,7 +2795,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2804,7 +2805,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A, @@ -2814,7 +2815,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2824,7 +2825,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A, @@ -2834,7 +2835,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, @@ -2844,7 +2845,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -2854,7 +2855,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A, @@ -2913,7 +2914,7 @@ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, @@ -2923,7 +2924,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, @@ -2933,7 +2934,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A, @@ -2943,7 +2944,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, @@ -2953,7 +2954,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, @@ -2963,7 +2964,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A, @@ -2973,7 +2974,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A, @@ -3032,7 +3033,7 @@ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3042,7 +3043,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3052,7 +3053,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A, @@ -3062,7 +3063,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, @@ -3072,7 +3073,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3082,7 +3083,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A, @@ -3092,7 +3093,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A, @@ -3109,7 +3110,7 @@ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A, @@ -3125,7 +3126,7 @@ (__v16sf)(__m512)(C), \ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A, @@ -3142,7 +3143,7 @@ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A, @@ -3159,7 +3160,7 @@ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A, @@ -3176,7 +3177,7 @@ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -3193,7 +3194,7 @@ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -3217,7 +3218,7 @@ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C) { return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A, @@ -3227,7 +3228,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U) { return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A, @@ -3251,7 +3252,7 @@ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C) { return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A, @@ -3261,7 +3262,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U) { return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A, @@ -3275,14 +3276,14 @@ /* Vector permutations */ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I, (__v16si) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I, __m512i __B) { @@ -3291,7 +3292,7 @@ (__v16si)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U, __m512i __B) { @@ -3300,7 +3301,7 @@ (__v16si)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I, __m512i __B) { @@ -3309,14 +3310,14 @@ (__v16si)_mm512_setzero_si512()); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B) { return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I, (__v8di) __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I, __m512i __B) { @@ -3325,7 +3326,7 @@ (__v8di)__A); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U, __m512i __B) { @@ -3334,7 +3335,7 @@ (__v8di)__I); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I, __m512i __B) { @@ -3404,7 +3405,7 @@ /* Vector Blend */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -3412,7 +3413,7 @@ (__v8df) __A); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -3420,7 +3421,7 @@ (__v16sf) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, @@ -3428,7 +3429,7 @@ (__v8di) __A); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, @@ -3566,7 +3567,7 @@ (__mmask16)(U), (int)(R)) -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epu32(__m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -3576,7 +3577,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -3585,7 +3586,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A, @@ -3624,13 +3625,13 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_ps (__m512i __A) { return (__m512)__builtin_convertvector((__v16su)__A, __v16sf); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -3638,7 +3639,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -3646,13 +3647,13 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8si)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3660,7 +3661,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3668,25 +3669,25 @@ (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepi32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) { return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A)); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_ps (__m512i __A) { return (__m512)__builtin_convertvector((__v16si)__A, __v16sf); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -3694,7 +3695,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -3702,13 +3703,13 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_pd(__m256i __A) { return (__m512d)__builtin_convertvector((__v8su)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3716,7 +3717,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -3724,13 +3725,13 @@ (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtepu32lo_pd(__m512i __A) { return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A) { return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A)); @@ -3751,7 +3752,7 @@ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_ps (__m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3760,7 +3761,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3769,7 +3770,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A) { return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A, @@ -3778,7 +3779,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpd_pslo (__m512d __A) { return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A), @@ -3786,7 +3787,7 @@ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A) { return (__m512) __builtin_shufflevector ( @@ -3842,7 +3843,7 @@ (__mmask16)(U), (int)(R)) -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtph_ps(__m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -3852,7 +3853,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -3861,7 +3862,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A) { return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A, @@ -3885,7 +3886,7 @@ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R)) -static __inline __m256i __DEFAULT_FN_ATTRS +static __inline __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epi32(__m512d __a) { return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a, @@ -3894,7 +3895,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, @@ -3903,7 +3904,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A, @@ -3927,7 +3928,7 @@ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R)) -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_cvttps_epi32(__m512 __a) { return (__m512i) @@ -3936,7 +3937,7 @@ (__mmask16) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, @@ -3945,7 +3946,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A, @@ -3969,7 +3970,7 @@ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epi32 (__m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, @@ -3978,7 +3979,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, @@ -3987,7 +3988,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A, @@ -4012,7 +4013,7 @@ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epi32 (__m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, @@ -4022,7 +4023,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, @@ -4031,7 +4032,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A, @@ -4056,7 +4057,7 @@ (__v16si)_mm512_setzero_si512(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtps_epu32 ( __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\ @@ -4066,7 +4067,7 @@ _MM_FROUND_CUR_DIRECTION);\ } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, @@ -4075,7 +4076,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A) { return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A, @@ -4100,7 +4101,7 @@ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtpd_epu32 (__m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, @@ -4110,7 +4111,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, @@ -4119,7 +4120,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A, @@ -4129,13 +4130,13 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_cvtsd_f64(__m512d __a) { return __a[0]; } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_cvtss_f32(__m512 __a) { return __a[0]; @@ -4143,14 +4144,14 @@ /* Unpack and Interleave */ -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpackhi_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4158,7 +4159,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4166,14 +4167,14 @@ (__v8df)_mm512_setzero_pd()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_unpacklo_pd(__m512d __a, __m512d __b) { return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4181,7 +4182,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U, @@ -4189,7 +4190,7 @@ (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpackhi_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, @@ -4199,7 +4200,7 @@ 2+12, 18+12, 3+12, 19+12); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4207,7 +4208,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4215,7 +4216,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_unpacklo_ps(__m512 __a, __m512 __b) { return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b, @@ -4225,7 +4226,7 @@ 0+12, 16+12, 1+12, 17+12); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4233,7 +4234,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512)__builtin_ia32_selectps_512((__mmask16) __U, @@ -4241,7 +4242,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, @@ -4251,7 +4252,7 @@ 2+12, 18+12, 3+12, 19+12); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4259,7 +4260,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4267,7 +4268,7 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi32(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B, @@ -4277,7 +4278,7 @@ 0+12, 16+12, 1+12, 17+12); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4285,7 +4286,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U, @@ -4293,14 +4294,14 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpackhi_epi64(__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4308,7 +4309,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4316,14 +4317,14 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B, 0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4331,7 +4332,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U, @@ -4342,7 +4343,7 @@ /* SIMD load ops */ -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_loadu_si512 (void const *__P) { struct __loadu_si512 { @@ -4351,7 +4352,7 @@ return ((struct __loadu_si512*)__P)->__v; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P, @@ -4360,7 +4361,7 @@ } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P, @@ -4369,7 +4370,7 @@ (__mmask16) __U); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P, @@ -4377,7 +4378,7 @@ (__mmask8) __U); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P, @@ -4386,7 +4387,7 @@ (__mmask8) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P, @@ -4394,7 +4395,7 @@ (__mmask16) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P, @@ -4403,7 +4404,7 @@ (__mmask16) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P, @@ -4411,7 +4412,7 @@ (__mmask8) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P, @@ -4420,7 +4421,7 @@ (__mmask8) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_loadu_pd(void const *__p) { struct __loadu_pd { @@ -4429,7 +4430,7 @@ return ((struct __loadu_pd*)__p)->__v; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_loadu_ps(void const *__p) { struct __loadu_ps { @@ -4438,13 +4439,13 @@ return ((struct __loadu_ps*)__p)->__v; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_load_ps(void const *__p) { return *(__m512*)__p; } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P, @@ -4452,7 +4453,7 @@ (__mmask16) __U); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_load_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P, @@ -4461,13 +4462,13 @@ (__mmask16) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_load_pd(void const *__p) { return *(__m512d*)__p; } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P, @@ -4475,7 +4476,7 @@ (__mmask8) __U); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_load_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P, @@ -4484,19 +4485,19 @@ (__mmask8) __U); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_si512 (void const *__P) { return *(__m512i *) __P; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi32 (void const *__P) { return *(__m512i *) __P; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_load_epi64 (void const *__P) { return *(__m512i *) __P; @@ -4504,14 +4505,14 @@ /* SIMD store ops */ -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A, (__mmask8) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_si512 (void *__P, __m512i __A) { struct __storeu_si512 { @@ -4520,20 +4521,20 @@ ((struct __storeu_si512*)__P)->__v = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A, (__mmask16) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_pd(void *__P, __m512d __A) { struct __storeu_pd { @@ -4542,14 +4543,14 @@ ((struct __storeu_pd*)__P)->__v = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A, (__mmask16) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_storeu_ps(void *__P, __m512 __A) { struct __storeu_ps { @@ -4558,44 +4559,44 @@ ((struct __storeu_ps*)__P)->__v = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_pd(void *__P, __m512d __A) { *(__m512d*)__P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A, (__mmask16) __U); } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_ps(void *__P, __m512 __A) { *(__m512*)__P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_si512 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi32 (void *__P, __m512i __A) { *(__m512i *) __P = __A; } -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS512 _mm512_store_epi64 (void *__P, __m512i __A) { *(__m512i *) __P = __A; @@ -4603,7 +4604,7 @@ /* Mask ops */ -static __inline __mmask16 __DEFAULT_FN_ATTRS +static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_knot(__mmask16 __M) { return __builtin_ia32_knothi(__M); @@ -4711,7 +4712,7 @@ #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \ _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi32(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char @@ -4719,7 +4720,7 @@ return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4727,7 +4728,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4735,7 +4736,7 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi8_epi64(__m128i __A) { /* This function always performs a signed extension, but __v16qi is a char @@ -4743,7 +4744,7 @@ return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4751,7 +4752,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4759,13 +4760,13 @@ (__v8di)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8si)__X, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4773,7 +4774,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4781,13 +4782,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4795,7 +4796,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4803,13 +4804,13 @@ (__v16si)_mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepi16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4817,7 +4818,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4825,13 +4826,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi32(__m128i __A) { return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4839,7 +4840,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4847,13 +4848,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu8_epi64(__m128i __A) { return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4861,7 +4862,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4869,13 +4870,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu32_epi64(__m256i __X) { return (__m512i)__builtin_convertvector((__v8su)__X, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4883,7 +4884,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4891,13 +4892,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi32(__m256i __A) { return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4905,7 +4906,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -4913,13 +4914,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_cvtepu16_epi64(__m128i __A) { return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4927,7 +4928,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -4935,13 +4936,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi32 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4949,7 +4950,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -4957,13 +4958,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rorv_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -4971,7 +4972,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5047,13 +5048,13 @@ (__v8di)_mm512_rol_epi64((a), (b)), \ (__v8di)_mm512_setzero_si512()) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi32 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -5061,7 +5062,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectd_512(__U, @@ -5069,13 +5070,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_rolv_epi64 (__m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5083,7 +5084,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B) { return (__m512i)__builtin_ia32_selectq_512(__U, @@ -5117,13 +5118,13 @@ (__v8di)_mm512_ror_epi64((A), (B)), \ (__v8di)_mm512_setzero_si512()) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi32(__m512i __A, int __B) { return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5131,20 +5132,20 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_slli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_slli_epi64(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5152,7 +5153,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5160,13 +5161,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi32(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5174,20 +5175,20 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, (__v16si)_mm512_srli_epi32(__A, __B), (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srli_epi64(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5195,7 +5196,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5203,7 +5204,7 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, @@ -5211,7 +5212,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P, @@ -5220,14 +5221,14 @@ (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A, (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, @@ -5235,7 +5236,7 @@ (__v16si) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U, @@ -5243,7 +5244,7 @@ (__v16si) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, @@ -5251,7 +5252,7 @@ (__v8di) __W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U, @@ -5259,7 +5260,7 @@ (__v8di) _mm512_setzero_si512 ()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, @@ -5267,7 +5268,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P, @@ -5276,21 +5277,21 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A, (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_movedup_pd (__m512d __A) { return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A, 0, 0, 2, 2, 4, 4, 6, 6); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -5298,7 +5299,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -5471,14 +5472,14 @@ (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A, (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, @@ -5494,7 +5495,7 @@ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A, @@ -5516,14 +5517,14 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, (__v4sf) __B, (__v4sf) _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, @@ -5539,7 +5540,7 @@ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A, @@ -5645,7 +5646,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kmov (__mmask16 __A) { return __A; @@ -5664,13 +5665,13 @@ (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R)) #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5678,7 +5679,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5686,13 +5687,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sll_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5700,7 +5701,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5708,13 +5709,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5722,7 +5723,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5730,13 +5731,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sllv_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5744,7 +5745,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5752,13 +5753,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5766,7 +5767,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5774,13 +5775,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_sra_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5788,7 +5789,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5796,13 +5797,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5810,7 +5811,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5818,13 +5819,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srav_epi64(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5832,7 +5833,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5840,13 +5841,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi32(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5854,7 +5855,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5862,13 +5863,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srl_epi64(__m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5876,7 +5877,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5884,13 +5885,13 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi32(__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5898,7 +5899,7 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, @@ -5906,13 +5907,13 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srlv_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5920,7 +5921,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, @@ -5978,7 +5979,7 @@ #define _mm_cvt_roundsd_u32(A, R) \ (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A, @@ -5990,7 +5991,7 @@ (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df) @@ -6016,7 +6017,7 @@ #define _mm_cvt_roundss_u32(A, R) \ (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvtss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A, @@ -6028,7 +6029,7 @@ (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvtss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf) @@ -6043,7 +6044,7 @@ #define _mm_cvtt_roundsd_si32(A, R) \ (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttsd_i32 (__m128d __A) { return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A, @@ -6057,7 +6058,7 @@ #define _mm_cvtt_roundsd_i64(A, R) \ (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R)) -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_i64 (__m128d __A) { return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A, @@ -6068,7 +6069,7 @@ #define _mm_cvtt_roundsd_u32(A, R) \ (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttsd_u32 (__m128d __A) { return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A, @@ -6080,7 +6081,7 @@ (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttsd_u64 (__m128d __A) { return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df) @@ -6095,7 +6096,7 @@ #define _mm_cvtt_roundss_si32(A, R) \ (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS128 _mm_cvttss_i32 (__m128 __A) { return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A, @@ -6109,7 +6110,7 @@ #define _mm_cvtt_roundss_si64(A, R) \ (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R)) -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS128 _mm_cvttss_i64 (__m128 __A) { return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A, @@ -6120,7 +6121,7 @@ #define _mm_cvtt_roundss_u32(A, R) \ (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R)) -static __inline__ unsigned __DEFAULT_FN_ATTRS +static __inline__ unsigned __DEFAULT_FN_ATTRS128 _mm_cvttss_u32 (__m128 __A) { return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A, @@ -6132,7 +6133,7 @@ (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \ (int)(R)) -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS128 _mm_cvttss_u64 (__m128 __A) { return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf) @@ -6167,13 +6168,13 @@ (__v16sf)_mm512_permute_ps((X), (C)), \ (__v16sf)_mm512_setzero_ps()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutevar_pd(__m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -6181,7 +6182,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -6189,13 +6190,13 @@ (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutevar_ps(__m512 __A, __m512i __C) { return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -6203,7 +6204,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -6211,14 +6212,14 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline __m512d __DEFAULT_FN_ATTRS +static __inline __m512d __DEFAULT_FN_ATTRS512 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B) { return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I, (__v8df)__B); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B) { return (__m512d)__builtin_ia32_selectpd_512(__U, @@ -6226,7 +6227,7 @@ (__v8df)__A); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U, __m512d __B) { @@ -6235,7 +6236,7 @@ (__v8df)(__m512d)__I); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I, __m512d __B) { @@ -6244,14 +6245,14 @@ (__v8df)_mm512_setzero_pd()); } -static __inline __m512 __DEFAULT_FN_ATTRS +static __inline __m512 __DEFAULT_FN_ATTRS512 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I, (__v16sf) __B); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -6259,7 +6260,7 @@ (__v16sf)__A); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -6267,7 +6268,7 @@ (__v16sf)(__m512)__I); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B) { return (__m512)__builtin_ia32_selectps_512(__U, @@ -6291,7 +6292,7 @@ (__v8si)_mm256_setzero_si256(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvttpd_epu32 (__m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, @@ -6301,7 +6302,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, @@ -6310,7 +6311,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A) { return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A, @@ -6422,7 +6423,7 @@ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_scalef_pd (__m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, @@ -6433,7 +6434,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, @@ -6443,7 +6444,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B) { return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A, @@ -6472,7 +6473,7 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_scalef_ps (__m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, @@ -6483,7 +6484,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, @@ -6493,7 +6494,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B) { return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A, @@ -6510,7 +6511,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_sd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A, @@ -6519,7 +6520,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, @@ -6535,7 +6536,7 @@ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A, @@ -6557,7 +6558,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ss (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A, @@ -6566,7 +6567,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, @@ -6582,7 +6583,7 @@ (__v4sf)(__m128)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A, @@ -6599,13 +6600,13 @@ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi32(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ @@ -6613,20 +6614,20 @@ (__v16si)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \ (__v16si)_mm512_srai_epi32(__A, __B), \ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_srai_epi64(__m512i __A, int __B) { return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ @@ -6634,7 +6635,7 @@ (__v8di)__W); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \ @@ -6756,7 +6757,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, @@ -6772,7 +6773,7 @@ (__v2df)(__m128d)(W), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A, @@ -6794,7 +6795,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)-1, (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, @@ -6810,7 +6811,7 @@ (__v4sf)(__m128)(W), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A, @@ -6826,7 +6827,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_broadcast_f32x4(__m128 __A) { return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, @@ -6834,7 +6835,7 @@ 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, @@ -6842,7 +6843,7 @@ (__v16sf)__O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__M, @@ -6850,14 +6851,14 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_broadcast_f64x4(__m256d __A) { return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, @@ -6865,7 +6866,7 @@ (__v8df)__O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M, @@ -6873,7 +6874,7 @@ (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i32x4(__m128i __A) { return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, @@ -6881,7 +6882,7 @@ 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -6889,7 +6890,7 @@ (__v16si)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -6897,14 +6898,14 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_broadcast_i64x4(__m256i __A) { return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -6912,7 +6913,7 @@ (__v8di)__O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -6920,7 +6921,7 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, @@ -6928,7 +6929,7 @@ (__v8df) __O); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m512d)__builtin_ia32_selectpd_512(__M, @@ -6936,7 +6937,7 @@ (__v8df) _mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, @@ -6944,7 +6945,7 @@ (__v16sf) __O); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A) { return (__m512)__builtin_ia32_selectps_512(__M, @@ -6952,7 +6953,7 @@ (__v16sf) _mm512_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, @@ -6960,14 +6961,14 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A, @@ -6975,13 +6976,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, @@ -6989,14 +6990,14 @@ (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A, @@ -7004,13 +7005,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, @@ -7018,14 +7019,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A, @@ -7033,13 +7034,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, @@ -7047,14 +7048,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, (__v8si) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A, @@ -7062,13 +7063,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtsepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, @@ -7076,14 +7077,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A, @@ -7091,13 +7092,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, @@ -7105,7 +7106,7 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, @@ -7113,7 +7114,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A, @@ -7121,13 +7122,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, @@ -7135,7 +7136,7 @@ (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, @@ -7143,7 +7144,7 @@ __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A, @@ -7151,13 +7152,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, @@ -7165,7 +7166,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, @@ -7173,7 +7174,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A, @@ -7181,13 +7182,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, @@ -7195,14 +7196,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, (__v8si) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A, @@ -7210,13 +7211,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtusepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, @@ -7224,14 +7225,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A, @@ -7239,13 +7240,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, @@ -7253,14 +7254,14 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A, @@ -7268,13 +7269,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi32_epi16 (__m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, @@ -7282,14 +7283,14 @@ (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, (__v16hi) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A, @@ -7297,13 +7298,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A) { __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi8 (__m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, @@ -7311,14 +7312,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A, @@ -7326,13 +7327,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi32 (__m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, @@ -7340,14 +7341,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, (__v8si) __O, __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A) { return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A, @@ -7355,13 +7356,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_cvtepi64_epi16 (__m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, @@ -7369,14 +7370,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A) { return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A, @@ -7384,7 +7385,7 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A) { __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M); @@ -7569,7 +7570,7 @@ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_getexp_pd (__m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, @@ -7578,7 +7579,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, @@ -7587,7 +7588,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A, @@ -7611,7 +7612,7 @@ (__v16sf)_mm512_setzero_ps(), \ (__mmask16)(U), (int)(R)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_getexp_ps (__m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, @@ -7620,7 +7621,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, @@ -7629,7 +7630,7 @@ _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A, @@ -7814,7 +7815,7 @@ (__v8si)(__m256i)(index), \ (__v8di)(__m512i)(v1), (int)(scale)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7830,7 +7831,7 @@ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -7846,7 +7847,7 @@ (__v4sf)(__m128)(C), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7862,7 +7863,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7878,7 +7879,7 @@ -(__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -7894,7 +7895,7 @@ -(__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7910,7 +7911,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7926,7 +7927,7 @@ (__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -7942,7 +7943,7 @@ (__v4sf)(__m128)(C), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7958,7 +7959,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -7974,7 +7975,7 @@ -(__v4sf)(__m128)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A, @@ -7990,7 +7991,7 @@ -(__v4sf)(__m128)(C), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U) { __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W, @@ -8006,7 +8007,7 @@ (__v4sf)(__m128)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8022,7 +8023,7 @@ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8038,7 +8039,7 @@ (__v2df)(__m128d)(C), (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8054,7 +8055,7 @@ (__v2df)(__m128d)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8070,7 +8071,7 @@ -(__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8086,7 +8087,7 @@ -(__v2df)(__m128d)(C), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8102,7 +8103,7 @@ (__v2df)(__m128d)(Y), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8118,7 +8119,7 @@ (__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8134,7 +8135,7 @@ (__v2df)(__m128d)(C), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8150,7 +8151,7 @@ (__v2df)(__m128d)(Y), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8166,7 +8167,7 @@ -(__v2df)(__m128d)(B), (__mmask8)(U), \ (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A, @@ -8183,7 +8184,7 @@ (__mmask8)(U), \ _MM_FROUND_CUR_DIRECTION) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U) { __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W, @@ -8225,13 +8226,13 @@ (__v8di)_mm512_permutex_epi64((X), (C)), \ (__v8di)_mm512_setzero_si512()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_permutexvar_pd (__m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -8239,7 +8240,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -8247,13 +8248,13 @@ (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M, @@ -8261,7 +8262,7 @@ (__v8di)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y) { @@ -8270,13 +8271,13 @@ (__v8di)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_permutexvar_ps (__m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -8284,7 +8285,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -8292,7 +8293,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X); @@ -8300,7 +8301,7 @@ #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32 -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y) { return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M, @@ -8308,7 +8309,7 @@ (__v16si)_mm512_setzero_si512()); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X, __m512i __Y) { @@ -8319,83 +8320,83 @@ #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32 -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kand (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kandn (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_kortestc (__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_kortestz (__mmask16 __A, __mmask16 __B) { return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kunpackb (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kxnor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_kxor (__mmask16 __A, __mmask16 __B) { return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_si512 (__m512i * __P, __m512i __A) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_stream_load_si512 (void const *__P) { typedef __v8di __v8di_aligned __attribute__((aligned(64))); return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_pd (double *__P, __m512d __A) { typedef __v8df __v8df_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_stream_ps (float *__P, __m512 __A) { typedef __v16sf __v16sf_aligned __attribute__((aligned(64))); __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, @@ -8403,7 +8404,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A, @@ -8412,7 +8413,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, @@ -8420,7 +8421,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A, @@ -8429,7 +8430,7 @@ (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, @@ -8437,7 +8438,7 @@ (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A, @@ -8446,7 +8447,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, @@ -8454,7 +8455,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A, @@ -8509,70 +8510,70 @@ /* Bit Test */ -static __inline __mmask16 __DEFAULT_FN_ATTRS +static __inline __mmask16 __DEFAULT_FN_ATTRS512 _mm512_test_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline __mmask8 __DEFAULT_FN_ATTRS +static __inline __mmask8 __DEFAULT_FN_ATTRS512 _mm512_test_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_testn_epi32_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_testn_epi64_mask (__m512i __A, __m512i __B) { return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS512 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B) { return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B), _mm512_setzero_si512()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_movehdup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -8580,7 +8581,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -8588,14 +8589,14 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_moveldup_ps (__m512 __A) { return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -8603,7 +8604,7 @@ (__v16sf)__W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A) { return (__m512)__builtin_ia32_selectps_512((__mmask16)__U, @@ -8611,7 +8612,7 @@ (__v16sf)_mm512_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { __m128 res = __A; @@ -8619,7 +8620,7 @@ return res; } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B) { __m128 res = __A; @@ -8627,7 +8628,7 @@ return res; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { __m128d res = __A; @@ -8635,7 +8636,7 @@ return res; } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B) { __m128d res = __A; @@ -8643,19 +8644,19 @@ return res; } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A) { __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A) { __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A) { __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W, @@ -8665,7 +8666,7 @@ return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ss (__mmask8 __U, const float* __A) { return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A, @@ -8673,7 +8674,7 @@ __U & 1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A) { __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W, @@ -8683,7 +8684,7 @@ return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_sd (__mmask8 __U, const double* __A) { return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, @@ -8704,7 +8705,7 @@ (__v16si)_mm512_shuffle_epi32((A), (I)), \ (__v16si)_mm512_setzero_si512()) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, @@ -8712,7 +8713,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A, @@ -8720,7 +8721,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, @@ -8728,7 +8729,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A) { return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A, @@ -8736,7 +8737,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, @@ -8744,7 +8745,7 @@ (__mmask8) __U); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P) { return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P, @@ -8752,7 +8753,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, @@ -8760,7 +8761,7 @@ (__mmask8) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P, @@ -8768,7 +8769,7 @@ (__mmask8) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, @@ -8776,7 +8777,7 @@ (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P) { return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P, @@ -8784,7 +8785,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, @@ -8792,7 +8793,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P) { return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P, @@ -8800,7 +8801,7 @@ (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, @@ -8808,7 +8809,7 @@ (__mmask16) __U); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A, @@ -8816,7 +8817,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, @@ -8824,7 +8825,7 @@ (__mmask16) __U); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A) { return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A, @@ -8847,13 +8848,13 @@ (__v8df)_mm512_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_cvtps_pd (__m256 __A) { return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -8861,7 +8862,7 @@ (__v8df)__W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A) { return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U, @@ -8869,19 +8870,19 @@ (__v8df)_mm512_setzero_pd()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_cvtpslo_pd (__m512 __A) { return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A)); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A) { return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A)); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -8889,7 +8890,7 @@ (__v8df) __W); } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A) { return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U, @@ -8897,7 +8898,7 @@ (__v8df) _mm512_setzero_pd ()); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -8905,7 +8906,7 @@ (__v16sf) __W); } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A) { return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U, @@ -8913,28 +8914,28 @@ (__v16sf) _mm512_setzero_ps ()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A) { __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A) { __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A) { __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A, (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS512 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A) { __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A, @@ -8959,7 +8960,7 @@ (__v4sf)_mm_setzero_ps(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, @@ -8968,7 +8969,7 @@ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B) { return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A, @@ -9032,7 +9033,7 @@ (__v2df)_mm_setzero_pd(), \ (__mmask8)(U), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B) { return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, @@ -9041,7 +9042,7 @@ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B) { return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A, @@ -9050,7 +9051,7 @@ (__mmask8)__U, _MM_FROUND_CUR_DIRECTION); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu32_sd (__m128d __A, unsigned __B) { __A[0] = __B; @@ -9062,7 +9063,7 @@ (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \ (unsigned long long)(B), (int)(R)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtu64_sd (__m128d __A, unsigned long long __B) { __A[0] = __B; @@ -9074,7 +9075,7 @@ (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \ (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu32_ss (__m128 __A, unsigned __B) { __A[0] = __B; @@ -9086,7 +9087,7 @@ (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \ (unsigned long long)(B), (int)(R)) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtu64_ss (__m128 __A, unsigned long long __B) { __A[0] = __B; @@ -9094,7 +9095,7 @@ } #endif -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A) { return (__m512i) __builtin_ia32_selectd_512(__M, @@ -9102,7 +9103,7 @@ (__v16si) __O); } -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A) { return (__m512i) __builtin_ia32_selectq_512(__M, @@ -9110,7 +9111,7 @@ (__v8di) __O); } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59, char __e58, char __e57, char __e56, char __e55, char __e54, char __e53, char __e52, char __e51, char __e50, char __e49, char __e48, char __e47, @@ -9134,7 +9135,7 @@ __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63}; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28, short __e27, short __e26, short __e25, short __e24, short __e23, short __e22, short __e21, short __e20, short __e19, short __e18, @@ -9149,7 +9150,7 @@ __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 }; } -static __inline __m512i __DEFAULT_FN_ATTRS +static __inline __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi32 (int __A, int __B, int __C, int __D, int __E, int __F, int __G, int __H, int __I, int __J, int __K, int __L, @@ -9165,7 +9166,7 @@ _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \ (e5),(e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512i __DEFAULT_FN_ATTRS +static __inline__ __m512i __DEFAULT_FN_ATTRS512 _mm512_set_epi64 (long long __A, long long __B, long long __C, long long __D, long long __E, long long __F, long long __G, long long __H) @@ -9177,7 +9178,7 @@ #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7) \ _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_set_pd (double __A, double __B, double __C, double __D, double __E, double __F, double __G, double __H) { @@ -9188,7 +9189,7 @@ #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7) \ _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_set_ps (float __A, float __B, float __C, float __D, float __E, float __F, float __G, float __H, float __I, float __J, float __K, float __L, @@ -9203,25 +9204,25 @@ _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \ (e4),(e3),(e2),(e1),(e0)) -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_abs_ps(__m512 __A) { return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } -static __inline__ __m512 __DEFAULT_FN_ATTRS +static __inline__ __m512 __DEFAULT_FN_ATTRS512 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A) { return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ; } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_abs_pd(__m512d __A) { return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ; } -static __inline__ __m512d __DEFAULT_FN_ATTRS +static __inline__ __m512d __DEFAULT_FN_ATTRS512 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A) { return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A); @@ -9248,41 +9249,41 @@ __v2du __t8 = __t6 op __t7; \ return __t8[0]; -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) { _mm512_mask_reduce_operator(+); } -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) { _mm512_mask_reduce_operator(*); } -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) { _mm512_mask_reduce_operator(&); } -static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) { +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) { _mm512_mask_reduce_operator(|); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); _mm512_mask_reduce_operator(+); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W); _mm512_mask_reduce_operator(*); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W); _mm512_mask_reduce_operator(&); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) { __W = _mm512_maskz_mov_epi64(__M, __W); _mm512_mask_reduce_operator(|); @@ -9300,21 +9301,21 @@ __m128d __t8 = __t6 op __t7; \ return __t8[0]; -static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) { +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) { _mm512_mask_reduce_operator(+); } -static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) { +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) { _mm512_mask_reduce_operator(*); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) { __W = _mm512_maskz_mov_pd(__M, __W); _mm512_mask_reduce_operator(+); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) { __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W); _mm512_mask_reduce_operator(*); @@ -9334,45 +9335,45 @@ __v4su __t10 = __t8 op __t9; \ return __t10[0]; -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi32(__m512i __W) { _mm512_mask_reduce_operator(+); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi32(__m512i __W) { _mm512_mask_reduce_operator(*); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi32(__m512i __W) { _mm512_mask_reduce_operator(&); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi32(__m512i __W) { _mm512_mask_reduce_operator(|); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); _mm512_mask_reduce_operator(+); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W); _mm512_mask_reduce_operator(*); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) { __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W); _mm512_mask_reduce_operator(&); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) { __W = _mm512_maskz_mov_epi32(__M, __W); _mm512_mask_reduce_operator(|); @@ -9392,23 +9393,23 @@ __m128 __t10 = __t8 op __t9; \ return __t10[0]; -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_add_ps(__m512 __W) { _mm512_mask_reduce_operator(+); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_ps(__m512 __W) { _mm512_mask_reduce_operator(*); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) { __W = _mm512_maskz_mov_ps(__M, __W); _mm512_mask_reduce_operator(+); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) { __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W); _mm512_mask_reduce_operator(*); @@ -9424,45 +9425,45 @@ __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \ return __t6[0]; -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi64(__m512i __V) { _mm512_mask_reduce_operator(max_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu64(__m512i __V) { _mm512_mask_reduce_operator(max_epu64); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi64(__m512i __V) { _mm512_mask_reduce_operator(min_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu64(__m512i __V) { _mm512_mask_reduce_operator(min_epu64); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V); _mm512_mask_reduce_operator(max_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_maskz_mov_epi64(__M, __V); _mm512_mask_reduce_operator(max_epu64); } -static __inline__ long long __DEFAULT_FN_ATTRS +static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V); _mm512_mask_reduce_operator(min_epi64); } -static __inline__ unsigned long long __DEFAULT_FN_ATTRS +static __inline__ unsigned long long __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) { __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V); _mm512_mask_reduce_operator(min_epu64); @@ -9482,45 +9483,45 @@ __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \ return __t10[0]; -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epi32(__m512i __V) { _mm512_mask_reduce_operator(max_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_max_epu32(__m512i __V) { _mm512_mask_reduce_operator(max_epu32); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epi32(__m512i __V) { _mm512_mask_reduce_operator(min_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_reduce_min_epu32(__m512i __V) { _mm512_mask_reduce_operator(min_epu32); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V); _mm512_mask_reduce_operator(max_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_maskz_mov_epi32(__M, __V); _mm512_mask_reduce_operator(max_epu32); } -static __inline__ int __DEFAULT_FN_ATTRS +static __inline__ int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V); _mm512_mask_reduce_operator(min_epi32); } -static __inline__ unsigned int __DEFAULT_FN_ATTRS +static __inline__ unsigned int __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) { __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V); _mm512_mask_reduce_operator(min_epu32); @@ -9538,23 +9539,23 @@ __m128d __t8 = _mm_##op(__t6, __t7); \ return __t8[0]; -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_max_pd(__m512d __V) { _mm512_mask_reduce_operator(max_pd); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_min_pd(__m512d __V) { _mm512_mask_reduce_operator(min_pd); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) { __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V); _mm512_mask_reduce_operator(max_pd); } -static __inline__ double __DEFAULT_FN_ATTRS +static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) { __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V); _mm512_mask_reduce_operator(min_pd); @@ -9574,29 +9575,30 @@ __m128 __t10 = _mm_##op(__t8, __t9); \ return __t10[0]; -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_max_ps(__m512 __V) { _mm512_mask_reduce_operator(max_ps); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_reduce_min_ps(__m512 __V) { _mm512_mask_reduce_operator(min_ps); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) { __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V); _mm512_mask_reduce_operator(max_ps); } -static __inline__ float __DEFAULT_FN_ATTRS +static __inline__ float __DEFAULT_FN_ATTRS512 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) { __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V); _mm512_mask_reduce_operator(min_ps); } #undef _mm512_mask_reduce_operator -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS512 +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVX512FINTRIN_H */ Index: lib/Headers/avx512ifmaintrin.h =================================================================== --- lib/Headers/avx512ifmaintrin.h +++ lib/Headers/avx512ifmaintrin.h @@ -29,7 +29,7 @@ #define __IFMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z) Index: lib/Headers/avx512ifmavlintrin.h =================================================================== --- lib/Headers/avx512ifmavlintrin.h +++ lib/Headers/avx512ifmavlintrin.h @@ -29,18 +29,19 @@ #define __IFMAVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y, (__v2di) __Z); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -48,7 +49,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -56,14 +57,14 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -71,7 +72,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -79,14 +80,14 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y, (__v2di)__Z); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -94,7 +95,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -102,14 +103,14 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y, (__v4di)__Z); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -117,7 +118,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -126,6 +127,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vbmi2intrin.h =================================================================== --- lib/Headers/avx512vbmi2intrin.h +++ lib/Headers/avx512vbmi2intrin.h @@ -29,7 +29,7 @@ #define __AVX512VBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vbmiintrin.h =================================================================== --- lib/Headers/avx512vbmiintrin.h +++ lib/Headers/avx512vbmiintrin.h @@ -29,7 +29,7 @@ #define __VBMIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vbmivlintrin.h =================================================================== --- lib/Headers/avx512vbmivlintrin.h +++ lib/Headers/avx512vbmivlintrin.h @@ -29,10 +29,11 @@ #define __VBMIVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A, @@ -40,7 +41,7 @@ (__v16qi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I, __m128i __B) { @@ -49,7 +50,7 @@ (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U, __m128i __B) { @@ -58,7 +59,7 @@ (__v16qi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I, __m128i __B) { @@ -67,14 +68,14 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I, (__v32qi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I, __m256i __B) { @@ -83,7 +84,7 @@ (__v32qi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U, __m256i __B) { @@ -92,7 +93,7 @@ (__v32qi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I, __m256i __B) { @@ -101,13 +102,13 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi8 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -115,7 +116,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { @@ -124,13 +125,13 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi8 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A, __m256i __B) { @@ -139,7 +140,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { @@ -148,7 +149,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, @@ -157,7 +158,7 @@ (__mmask16) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, @@ -167,7 +168,7 @@ (__mmask16) __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y) { return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X, @@ -177,7 +178,7 @@ (__mmask16) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, @@ -186,7 +187,7 @@ (__mmask32) __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, @@ -196,7 +197,7 @@ (__mmask32) __M); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y) { return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X, @@ -207,6 +208,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlbitalgintrin.h =================================================================== --- lib/Headers/avx512vlbitalgintrin.h +++ lib/Headers/avx512vlbitalgintrin.h @@ -29,15 +29,16 @@ #define __AVX512VLBITALGINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi16(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, @@ -45,7 +46,7 @@ (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), @@ -53,13 +54,13 @@ __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi16(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, @@ -67,7 +68,7 @@ (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), @@ -75,13 +76,13 @@ __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_popcnt_epi8(__m256i __A) { return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, @@ -89,7 +90,7 @@ (__v32qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), @@ -97,13 +98,13 @@ __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_popcnt_epi8(__m128i __A) { return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, @@ -111,7 +112,7 @@ (__v16qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), @@ -119,7 +120,7 @@ __B); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B) { return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, @@ -127,7 +128,7 @@ __U); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B) { return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1, @@ -135,7 +136,7 @@ __B); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B) { return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, @@ -143,7 +144,7 @@ __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B) { return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1, @@ -152,6 +153,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlbwintrin.h =================================================================== --- lib/Headers/avx512vlbwintrin.h +++ lib/Headers/avx512vlbwintrin.h @@ -29,7 +29,8 @@ #define __AVX512VLBWINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256))) /* Integer compare */ @@ -313,147 +314,147 @@ #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \ _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){ return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_add_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_add_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_sub_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_sub_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_add_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_add_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_sub_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_sub_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mullo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mullo_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -461,7 +462,7 @@ (__v16qi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -469,7 +470,7 @@ (__v32qi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -477,7 +478,7 @@ (__v8hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -485,7 +486,7 @@ (__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -493,7 +494,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -501,7 +502,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -509,7 +510,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -517,7 +518,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -525,7 +526,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -533,7 +534,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -541,7 +542,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -549,14 +550,14 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, (__v8hi)_mm_packs_epi32(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -564,7 +565,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -572,7 +573,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -580,7 +581,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -588,7 +589,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -596,7 +597,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -604,7 +605,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -612,7 +613,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -620,7 +621,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -628,7 +629,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -636,7 +637,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -644,7 +645,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -652,7 +653,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -660,7 +661,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -668,7 +669,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -676,7 +677,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -684,7 +685,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -692,7 +693,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -700,7 +701,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -708,7 +709,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -716,7 +717,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -724,7 +725,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -732,7 +733,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -740,7 +741,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -748,7 +749,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -756,7 +757,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -764,7 +765,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -772,7 +773,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -780,7 +781,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -788,7 +789,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -796,7 +797,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -804,7 +805,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -812,7 +813,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -820,7 +821,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -828,7 +829,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -836,7 +837,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -844,7 +845,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -852,7 +853,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -860,7 +861,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -868,7 +869,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -876,7 +877,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -884,7 +885,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -892,7 +893,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -900,7 +901,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -908,7 +909,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -916,7 +917,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -924,7 +925,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -932,7 +933,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -940,7 +941,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -948,7 +949,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -956,7 +957,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -964,7 +965,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -972,7 +973,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -980,7 +981,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -988,7 +989,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -996,7 +997,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1004,7 +1005,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1012,7 +1013,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1020,7 +1021,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1028,7 +1029,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1036,7 +1037,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1044,7 +1045,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1052,7 +1053,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1060,7 +1061,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1068,7 +1069,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, @@ -1076,7 +1077,7 @@ (__v16qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1084,7 +1085,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M, @@ -1092,7 +1093,7 @@ (__v32qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1100,7 +1101,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -1108,7 +1109,7 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1116,7 +1117,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M, @@ -1124,7 +1125,7 @@ (__v16hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1132,7 +1133,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1140,7 +1141,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1148,7 +1149,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1156,7 +1157,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1164,7 +1165,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1172,7 +1173,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1180,7 +1181,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1188,7 +1189,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1196,7 +1197,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1204,7 +1205,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1212,7 +1213,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1220,7 +1221,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1228,7 +1229,7 @@ (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, @@ -1236,7 +1237,7 @@ (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1244,7 +1245,7 @@ (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, @@ -1252,7 +1253,7 @@ (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1260,7 +1261,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1268,7 +1269,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1276,7 +1277,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1284,14 +1285,14 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I, (__v8hi) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { @@ -1300,7 +1301,7 @@ (__v8hi)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { @@ -1309,7 +1310,7 @@ (__v8hi)__I); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { @@ -1318,14 +1319,14 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I, __m256i __B) { @@ -1334,7 +1335,7 @@ (__v16hi)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U, __m256i __B) { @@ -1343,7 +1344,7 @@ (__v16hi)__I); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I, __m256i __B) { @@ -1352,21 +1353,21 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_maddubs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1374,126 +1375,126 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_maddubs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_madd_epi16(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_madd_epi16(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi16_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi16_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, @@ -1501,273 +1502,273 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A, (__v16qi) _mm_setzero_si128(), __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi16_epi8 (__m256i __A) { return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M, (__v16qi)_mm256_cvtepi16_epi8(__A), (__v16qi)_mm_setzero_si128()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A) { __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhrs_epi16(__X, __Y), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhrs_epi16(__X, __Y), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epu16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epu16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_mulhi_epi16(__A, __B), (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_mulhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpackhi_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpackhi_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpackhi_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpackhi_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U, (__v16qi)_mm_unpacklo_epi8(__A, __B), (__v16qi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U, (__v32qi)_mm256_unpacklo_epi8(__A, __B), (__v32qi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, (__v8hi)_mm_unpacklo_epi16(__A, __B), (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, (__v16hi)_mm256_unpacklo_epi16(__A, __B), (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1775,7 +1776,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1783,7 +1784,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1791,7 +1792,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1800,7 +1801,7 @@ } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1808,7 +1809,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1816,7 +1817,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1824,7 +1825,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1875,13 +1876,13 @@ (imm)), \ (__v16hi)_mm256_setzero_si256()) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sllv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1889,7 +1890,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1897,13 +1898,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sllv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1911,7 +1912,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1919,7 +1920,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1927,7 +1928,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1935,7 +1936,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1943,7 +1944,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1951,7 +1952,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1959,7 +1960,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -1967,7 +1968,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1975,7 +1976,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1983,13 +1984,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srlv_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -1997,7 +1998,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2005,13 +2006,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srlv_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2019,7 +2020,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2027,13 +2028,13 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi16(__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2041,7 +2042,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2049,13 +2050,13 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi16(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2063,7 +2064,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2071,7 +2072,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2079,7 +2080,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2087,7 +2088,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2095,7 +2096,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2103,7 +2104,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2111,7 +2112,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2119,7 +2120,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2127,7 +2128,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2135,7 +2136,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2143,7 +2144,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2151,7 +2152,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2159,7 +2160,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2167,7 +2168,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2175,7 +2176,7 @@ (__v8hi)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U, @@ -2183,7 +2184,7 @@ (__v8hi)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2191,7 +2192,7 @@ (__v16hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U, @@ -2199,7 +2200,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2207,7 +2208,7 @@ (__v8hi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U, @@ -2215,7 +2216,7 @@ (__v8hi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2223,7 +2224,7 @@ (__v16hi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U, @@ -2231,7 +2232,7 @@ (__v16hi) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2239,7 +2240,7 @@ (__v16qi) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U, @@ -2247,7 +2248,7 @@ (__v16qi) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2255,7 +2256,7 @@ (__v32qi) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U, @@ -2264,7 +2265,7 @@ } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2272,7 +2273,7 @@ (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi8 (__mmask16 __M, char __A) { return (__m128i) __builtin_ia32_selectb_128(__M, @@ -2280,7 +2281,7 @@ (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2288,7 +2289,7 @@ (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A) { return (__m256i) __builtin_ia32_selectb_256(__M, @@ -2296,7 +2297,7 @@ (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, @@ -2304,7 +2305,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P, @@ -2313,7 +2314,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, @@ -2321,7 +2322,7 @@ (__mmask16) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P, @@ -2330,7 +2331,7 @@ (__mmask16) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, @@ -2338,7 +2339,7 @@ (__mmask16) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P, @@ -2347,7 +2348,7 @@ (__mmask16) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, @@ -2355,7 +2356,7 @@ (__mmask32) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P, @@ -2363,7 +2364,7 @@ _mm256_setzero_si256 (), (__mmask32) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedquhi128_mask ((__v8hi *) __P, @@ -2371,7 +2372,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A) { __builtin_ia32_storedquhi256_mask ((__v16hi *) __P, @@ -2379,7 +2380,7 @@ (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A) { __builtin_ia32_storedquqi128_mask ((__v16qi *) __P, @@ -2387,7 +2388,7 @@ (__mmask16) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A) { __builtin_ia32_storedquqi256_mask ((__v32qi *) __P, @@ -2395,162 +2396,162 @@ (__mmask32) __U); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_test_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_test_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi16_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_test_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256 ()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_testn_epi8_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_testn_epi8_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi16_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_testn_epi16_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS128 _mm_movepi8_mask (__m128i __A) { return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A); } -static __inline__ __mmask32 __DEFAULT_FN_ATTRS +static __inline__ __mmask32 __DEFAULT_FN_ATTRS256 _mm256_movepi8_mask (__m256i __A) { return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi16_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A); } -static __inline__ __mmask16 __DEFAULT_FN_ATTRS +static __inline__ __mmask16 __DEFAULT_FN_ATTRS256 _mm256_movepi16_mask (__m256i __A) { return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi8 (__mmask16 __A) { return (__m128i) __builtin_ia32_cvtmask2b128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi8 (__mmask32 __A) { return (__m256i) __builtin_ia32_cvtmask2b256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi16 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2w128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi16 (__mmask16 __A) { return (__m256i) __builtin_ia32_cvtmask2w256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2558,7 +2559,7 @@ (__v16qi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectb_128(__M, @@ -2566,7 +2567,7 @@ (__v16qi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2574,7 +2575,7 @@ (__v32qi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectb_256(__M, @@ -2582,7 +2583,7 @@ (__v32qi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2590,7 +2591,7 @@ (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectw_128(__M, @@ -2598,7 +2599,7 @@ (__v8hi) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2606,7 +2607,7 @@ (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectw_256(__M, @@ -2614,7 +2615,7 @@ (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256 (__M, @@ -2622,7 +2623,7 @@ (__v16hi) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A) { return (__m256i) __builtin_ia32_selectw_256(__M, @@ -2630,7 +2631,7 @@ (__v16hi) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2638,7 +2639,7 @@ (__v8hi) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi16 (__mmask8 __M, short __A) { return (__m128i) __builtin_ia32_selectw_128(__M, @@ -2646,13 +2647,13 @@ (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutexvar_epi16 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M, @@ -2660,7 +2661,7 @@ (__v8hi) _mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { @@ -2669,13 +2670,13 @@ (__v8hi)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi16 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A, __m256i __B) { @@ -2684,7 +2685,7 @@ (__v16hi)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A, __m256i __B) { @@ -2741,6 +2742,7 @@ (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \ (__v16hi)_mm256_setzero_si256()) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLBWINTRIN_H */ Index: lib/Headers/avx512vlcdintrin.h =================================================================== --- lib/Headers/avx512vlcdintrin.h +++ lib/Headers/avx512vlcdintrin.h @@ -28,35 +28,36 @@ #define __AVX512VLCDINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmb_epi64 (__mmask8 __A) { return (__m128i) _mm_set1_epi64x((long long) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmb_epi64 (__mmask8 __A) { return (__m256i) _mm256_set1_epi64x((long long)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcastmw_epi32 (__mmask16 __A) { return (__m128i) _mm_set1_epi32((int)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcastmw_epi32 (__mmask16 __A) { return (__m256i) _mm256_set1_epi32((int)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -64,7 +65,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -72,7 +73,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A, @@ -81,7 +82,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -89,7 +90,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -97,7 +98,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A, @@ -105,7 +106,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_conflict_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -113,7 +114,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -121,7 +122,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A, @@ -129,7 +130,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_conflict_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -137,7 +138,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -145,7 +146,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A, @@ -154,13 +155,13 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -168,7 +169,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -176,13 +177,13 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi32 (__m256i __A) { return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -190,7 +191,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -198,13 +199,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_lzcnt_epi64 (__m128i __A) { return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -212,7 +213,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -220,13 +221,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_lzcnt_epi64 (__m256i __A) { return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -234,7 +235,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -242,6 +243,7 @@ (__v4di)_mm256_setzero_si256()); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLCDINTRIN_H */ Index: lib/Headers/avx512vldqintrin.h =================================================================== --- lib/Headers/avx512vldqintrin.h +++ lib/Headers/avx512vldqintrin.h @@ -29,760 +29,761 @@ #define __AVX512VLDQINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mullo_epi64 (__m256i __A, __m256i __B) { return (__m256i) ((__v4du) __A * (__v4du) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_mullo_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mullo_epi64 (__m128i __A, __m128i __B) { return (__m128i) ((__v2du) __A * (__v2du) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_mullo_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_andnot_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_andnot_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_andnot_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_andnot_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_and_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_and_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_and_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_and_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_xor_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_xor_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_xor_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_xor_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_or_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_or_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_or_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_or_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepi64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2di)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepi64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4di)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepi64_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepi64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_ps (__m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epi64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epi64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu64 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu64 (__m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) { return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epi64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epi64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu64 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A, (__v2di) _mm_setzero_si128(), (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu64 (__m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) { return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A, (__v4di) _mm256_setzero_si256(), (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu64_pd (__m128i __A) { return (__m128d)__builtin_convertvector((__v2du)__A, __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtepu64_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_pd (__m256i __A) { return (__m256d)__builtin_convertvector((__v4du)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtepu64_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu64_ps (__m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_cvtepu64_ps (__m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) { return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A, (__v4sf) _mm_setzero_ps(), @@ -919,62 +920,62 @@ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U)) -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi32_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi32_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi32 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2d128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi32 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2d256 (__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_movm_epi64 (__mmask8 __A) { return (__m128i) __builtin_ia32_cvtmask2q128 (__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_movm_epi64 (__mmask8 __A) { return (__m256i) __builtin_ia32_cvtmask2q256 (__A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_movepi64_mask (__m128i __A) { return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_movepi64_mask (__m256i __A) { return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x2 (__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -982,7 +983,7 @@ (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -990,14 +991,14 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_broadcast_f64x2(__m128d __A) { return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A, 0, 1, 0, 1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, @@ -1005,7 +1006,7 @@ (__v4df)__O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M, @@ -1013,14 +1014,14 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_broadcast_i32x2 (__m128i __A) { return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -1028,7 +1029,7 @@ (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -1036,14 +1037,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x2 (__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 0, 1, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -1051,7 +1052,7 @@ (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -1059,14 +1060,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i64x2(__m128i __A) { return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A, 0, 1, 0, 1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -1074,7 +1075,7 @@ (__v4di)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -1178,6 +1179,7 @@ (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \ (__mmask8)-1) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlintrin.h =================================================================== --- lib/Headers/avx512vlintrin.h +++ lib/Headers/avx512vlintrin.h @@ -28,7 +28,8 @@ #ifndef __AVX512VLINTRIN_H #define __AVX512VLINTRIN_H -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256))) /* Integer compare */ @@ -232,7 +233,7 @@ #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \ _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -240,7 +241,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -248,7 +249,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -256,7 +257,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -264,7 +265,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -272,7 +273,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -280,7 +281,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -288,7 +289,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -296,7 +297,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -304,7 +305,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -312,7 +313,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -320,7 +321,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -328,7 +329,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -336,7 +337,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -344,7 +345,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -352,7 +353,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -360,7 +361,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -368,7 +369,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -376,7 +377,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -384,7 +385,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -392,7 +393,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -400,7 +401,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -408,7 +409,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -416,7 +417,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, @@ -424,7 +425,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -432,7 +433,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -440,7 +441,7 @@ (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -448,7 +449,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -456,7 +457,7 @@ (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -464,13 +465,13 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -478,13 +479,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -492,14 +493,14 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -507,13 +508,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -521,13 +522,13 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -535,13 +536,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -549,13 +550,13 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -564,13 +565,13 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -578,13 +579,13 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -592,13 +593,13 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -606,14 +607,14 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -621,13 +622,13 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -635,13 +636,13 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -649,13 +650,13 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -663,13 +664,13 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { @@ -678,7 +679,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B); @@ -804,7 +805,7 @@ (__v2df)(__m128d)(b), (int)(p), \ (__mmask8)(m)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -814,7 +815,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -824,7 +825,7 @@ (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -834,7 +835,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -844,7 +845,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -854,7 +855,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -864,7 +865,7 @@ (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -874,7 +875,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -884,7 +885,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -894,7 +895,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -904,7 +905,7 @@ (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -914,7 +915,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -924,7 +925,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -934,7 +935,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -944,7 +945,7 @@ (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -954,7 +955,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -964,7 +965,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -974,7 +975,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -984,7 +985,7 @@ (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -994,7 +995,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1004,7 +1005,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1014,7 +1015,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1024,7 +1025,7 @@ (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1034,7 +1035,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1044,7 +1045,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1054,7 +1055,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1064,7 +1065,7 @@ (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1074,7 +1075,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1084,7 +1085,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1094,7 +1095,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1104,7 +1105,7 @@ (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1114,7 +1115,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1124,7 +1125,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1134,7 +1135,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1144,7 +1145,7 @@ (__v2df) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1154,7 +1155,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1164,7 +1165,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1174,7 +1175,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1184,7 +1185,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1194,7 +1195,7 @@ (__v4df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1204,7 +1205,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1214,7 +1215,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1224,7 +1225,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1234,7 +1235,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1244,7 +1245,7 @@ (__v4sf) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1254,7 +1255,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1264,7 +1265,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1274,7 +1275,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { @@ -1285,7 +1286,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1295,7 +1296,7 @@ (__v8sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1305,7 +1306,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1315,7 +1316,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1325,7 +1326,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1335,7 +1336,7 @@ (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1345,7 +1346,7 @@ (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1355,7 +1356,7 @@ (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1365,7 +1366,7 @@ (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1375,7 +1376,7 @@ (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1385,7 +1386,7 @@ (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1395,7 +1396,7 @@ (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1405,7 +1406,7 @@ (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1415,7 +1416,7 @@ (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1425,7 +1426,7 @@ (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1435,7 +1436,7 @@ (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1445,7 +1446,7 @@ (__v8sf) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1455,7 +1456,7 @@ (__v2df) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U) { return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U, @@ -1465,7 +1466,7 @@ (__v2df) __C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1475,7 +1476,7 @@ (__v4df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U) { return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U, @@ -1485,7 +1486,7 @@ (__v4df) __C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1495,7 +1496,7 @@ (__v4sf) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U) { return (__m128) __builtin_ia32_selectps_128((__mmask8) __U, @@ -1505,7 +1506,7 @@ (__v4sf) __C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1515,7 +1516,7 @@ (__v8sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U) { return (__m256) __builtin_ia32_selectps_256((__mmask8) __U, @@ -1525,126 +1526,126 @@ (__v8sf) __C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_add_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_add_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_add_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_add_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, (__v4si) __W, (__v4si) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, (__v8si) __W, (__v8si) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, (__v2df) __W, (__v2df) __A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, (__v4df) __W, (__v4df) __A); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, (__v4sf) __W, (__v4sf) __A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, (__v8sf) __W, (__v8sf) __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, (__v2di) __W, (__v2di) __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, (__v4di) __W, (__v4di) __A); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A, (__v2df) @@ -1652,14 +1653,14 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A, (__v4df) @@ -1667,14 +1668,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A, (__v2di) @@ -1682,14 +1683,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A, (__v4di) @@ -1697,14 +1698,14 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A, (__v4sf) @@ -1712,14 +1713,14 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A, (__v8sf) @@ -1727,14 +1728,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A, (__v4si) @@ -1742,14 +1743,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A, (__v8si) @@ -1757,126 +1758,126 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_compressstoredf128_mask ((__v2df *) __P, (__v2df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_compressstoredf256_mask ((__v4df *) __P, (__v4df) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoredi128_mask ((__v2di *) __P, (__v2di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoredi256_mask ((__v4di *) __P, (__v4di) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P, (__v4sf) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P, (__v8sf) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_compressstoresi128_mask ((__v4si *) __P, (__v4si) __A, (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_compressstoresi256_mask ((__v8si *) __P, (__v8si) __A, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepi32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepi32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepi32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepi32_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A, (__v4si) @@ -1884,28 +1885,28 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvtpd_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) { return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A, (__v4sf) @@ -1913,21 +1914,21 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm256_cvtpd_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) @@ -1935,14 +1936,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A, (__v4si) @@ -1950,7 +1951,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) @@ -1958,14 +1959,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A, (__v4si) @@ -1973,63 +1974,63 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvtps_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvtps_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_cvtps_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_cvtps_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2037,14 +2038,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2052,7 +2053,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvtps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2060,14 +2061,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2075,14 +2076,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A, (__v4si) @@ -2090,21 +2091,21 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm256_cvttpd_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttpd_epu32 (__m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) @@ -2112,14 +2113,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) { return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A, (__v4si) @@ -2127,7 +2128,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvttpd_epu32 (__m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) @@ -2135,14 +2136,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) { return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A, (__v4si) @@ -2150,35 +2151,35 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_cvttps_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_cvttps_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvttps_epu32 (__m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2186,14 +2187,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A, (__v4si) @@ -2201,7 +2202,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cvttps_epu32 (__m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2209,14 +2210,14 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) { return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A, (__v8si) @@ -2224,147 +2225,147 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_cvtepu32_pd (__m128i __A) { return (__m128d) __builtin_convertvector( __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U, (__v2df)_mm_cvtepu32_pd(__A), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_pd (__m128i __A) { return (__m256d)__builtin_convertvector((__v4su)__A, __v4df); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U, (__v4df)_mm256_cvtepu32_pd(__A), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_cvtepu32_ps (__m128i __A) { return (__m128)__builtin_convertvector((__v4su)__A, __v4sf); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_cvtepu32_ps(__A), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtepu32_ps (__m256i __A) { return (__m256)__builtin_convertvector((__v8su)__A, __v8sf); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_cvtepu32_ps(__A), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_div_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_div_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_div_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_div_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A, (__v2df) @@ -2372,14 +2373,14 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A, (__v4df) @@ -2387,14 +2388,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A, (__v2di) @@ -2402,14 +2403,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A, (__v4di) @@ -2417,7 +2418,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, (__v2df) __W, @@ -2425,7 +2426,7 @@ __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P, (__v2df) @@ -2434,7 +2435,7 @@ __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, (__v4df) __W, @@ -2442,7 +2443,7 @@ __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P, (__v4df) @@ -2451,7 +2452,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, (__v2di) __W, @@ -2459,7 +2460,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P, (__v2di) @@ -2468,7 +2469,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, @@ -2477,7 +2478,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P, (__v4di) @@ -2486,14 +2487,14 @@ __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P, (__v4sf) @@ -2502,14 +2503,14 @@ __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P, (__v8sf) @@ -2518,7 +2519,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, (__v4si) __W, @@ -2526,7 +2527,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P, (__v4si) @@ -2534,7 +2535,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, @@ -2543,7 +2544,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P, (__v8si) @@ -2552,14 +2553,14 @@ __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A, (__v4sf) @@ -2567,14 +2568,14 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A, (__v8sf) @@ -2582,14 +2583,14 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) __W, (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A, (__v4si) @@ -2597,14 +2598,14 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) __W, (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A, (__v8si) @@ -2612,7 +2613,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_getexp_pd (__m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) @@ -2620,14 +2621,14 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) __W, (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A, (__v2df) @@ -2635,7 +2636,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_getexp_pd (__m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) @@ -2643,14 +2644,14 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) __W, (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A, (__v4df) @@ -2658,7 +2659,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_getexp_ps (__m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) @@ -2666,14 +2667,14 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) __W, (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A, (__v4sf) @@ -2681,7 +2682,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_getexp_ps (__m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) @@ -2689,14 +2690,14 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) __W, (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A, (__v8sf) @@ -2704,498 +2705,498 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_max_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_max_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_max_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_max_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_min_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_min_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_min_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_min_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_mul_pd(__A, __B), (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_mul_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_mul_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_mul_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, (__v4si)_mm_abs_epi32(__A), (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, (__v8si)_mm256_abs_epi32(__A), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_abs_epi64 (__m128i __A) { return (__m128i)__builtin_ia32_pabsq128((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, (__v2di)_mm_abs_epi64(__A), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_abs_epi64 (__m256i __A) { return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, (__v4di)_mm256_abs_epi64(__A), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_max_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_max_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_max_epu64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_max_epu64(__A, __B), (__v2di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_max_epu64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_max_epu64(__A, __B), (__v4di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epi32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epi32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epi64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epi64(__A, __B), (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, (__v4si)_mm_min_epu32(__A, __B), (__v4si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, (__v8si)_mm256_min_epu32(__A, __B), (__v8si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_min_epu64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M, (__v2di)_mm_min_epu64(__A, __B), (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_min_epu64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, (__v4di)_mm256_min_epu64(__A, __B), @@ -3276,7 +3277,7 @@ (__v8sf)_mm256_setzero_ps(), \ (__mmask8)(U)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_scalef_pd (__m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, @@ -3285,7 +3286,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, @@ -3294,7 +3295,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A, (__v2df) __B, @@ -3303,7 +3304,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_scalef_pd (__m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, @@ -3312,7 +3313,7 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, @@ -3321,7 +3322,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A, (__v4df) __B, @@ -3330,7 +3331,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_scalef_ps (__m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -3339,7 +3340,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -3347,7 +3348,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) { return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A, (__v4sf) __B, @@ -3356,7 +3357,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_scalef_ps (__m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -3365,7 +3366,7 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, @@ -3374,7 +3375,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) { return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A, (__v8sf) __B, @@ -3543,125 +3544,125 @@ (__v8si)(__m256i)(index), \ (__v8si)(__m256i)(v1), (int)(scale)) - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)__W); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sqrt_pd(__A), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)__W); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sqrt_pd(__A), (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)__W); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sqrt_ps(__A), (__v4sf)_mm_setzero_pd()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)__W); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sqrt_ps(__A), (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)__W); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, (__v2df)_mm_sub_pd(__A, __B), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)__W); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, (__v4df)_mm256_sub_pd(__A, __B), (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)__W); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, (__v4sf)_mm_sub_ps(__A, __B), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)__W); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, (__v8sf)_mm256_sub_ps(__A, __B), (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I, (__v4si)__B); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3669,7 +3670,7 @@ (__v4si)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3677,7 +3678,7 @@ (__v4si)__I); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -3685,13 +3686,13 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I, (__v8si) __B); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3699,7 +3700,7 @@ (__v8si)__A); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3707,7 +3708,7 @@ (__v8si)__I); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -3715,40 +3716,40 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I, (__v2df)__B); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)__A); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)(__m128d)__I); } - static __inline__ __m128d __DEFAULT_FN_ATTRS + static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128(__U, (__v2df)_mm_permutex2var_pd(__A, __I, __B), (__v2df)_mm_setzero_pd()); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I, (__v4df)__B); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3756,7 +3757,7 @@ (__v4df)__A); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3764,7 +3765,7 @@ (__v4df)(__m256d)__I); } - static __inline__ __m256d __DEFAULT_FN_ATTRS + static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256(__U, @@ -3772,47 +3773,47 @@ (__v4df)_mm256_setzero_pd()); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I, (__v4sf)__B); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)__A); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)(__m128)__I); } - static __inline__ __m128 __DEFAULT_FN_ATTRS + static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) { return (__m128)__builtin_ia32_selectps_128(__U, (__v4sf)_mm_permutex2var_ps(__A, __I, __B), (__v4sf)_mm_setzero_ps()); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I, (__v8sf) __B); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, (__v8sf)_mm256_permutex2var_ps(__A, __I, __B), (__v8sf)__A); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, @@ -3820,7 +3821,7 @@ (__v8sf)(__m256)__I); } - static __inline__ __m256 __DEFAULT_FN_ATTRS + static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I, __m256 __B) { return (__m256)__builtin_ia32_selectps_256(__U, @@ -3828,13 +3829,13 @@ (__v8sf)_mm256_setzero_ps()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I, (__v2di)__B); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3842,7 +3843,7 @@ (__v2di)__A); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3850,7 +3851,7 @@ (__v2di)__I); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -3859,13 +3860,13 @@ } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I, (__v4di) __B); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3873,7 +3874,7 @@ (__v4di)__A); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3881,7 +3882,7 @@ (__v4di)__I); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -3889,7 +3890,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -3897,7 +3898,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -3905,7 +3906,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -3913,7 +3914,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -3921,7 +3922,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3929,7 +3930,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3937,7 +3938,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3945,7 +3946,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3953,7 +3954,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3961,7 +3962,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -3969,7 +3970,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3977,7 +3978,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -3985,7 +3986,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -3993,7 +3994,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4001,7 +4002,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4009,7 +4010,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4017,7 +4018,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4025,7 +4026,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4033,7 +4034,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4041,7 +4042,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4050,7 +4051,7 @@ } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4058,7 +4059,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4066,7 +4067,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4074,7 +4075,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4082,7 +4083,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4090,7 +4091,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4098,7 +4099,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4106,7 +4107,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4114,7 +4115,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4122,7 +4123,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4130,7 +4131,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4138,7 +4139,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4146,7 +4147,7 @@ (__v4di)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4154,7 +4155,7 @@ (__v4si)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4162,7 +4163,7 @@ (__v4si)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4170,7 +4171,7 @@ (__v8si)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4178,7 +4179,7 @@ (__v8si)_mm256_setzero_si256()); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4186,7 +4187,7 @@ (__v2di)__W); } - static __inline__ __m128i __DEFAULT_FN_ATTRS + static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4194,7 +4195,7 @@ (__v2di)_mm_setzero_si128()); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4202,7 +4203,7 @@ (__v4di)__W); } - static __inline__ __m256i __DEFAULT_FN_ATTRS + static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4263,13 +4264,13 @@ (__v4di)_mm256_rol_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256()) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4277,7 +4278,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4285,13 +4286,13 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4299,7 +4300,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4307,13 +4308,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rolv_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4321,7 +4322,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4329,13 +4330,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rolv_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4343,7 +4344,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4403,7 +4404,7 @@ (__v4di)_mm256_ror_epi64((a), (b)), \ (__v4di)_mm256_setzero_si256()) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4411,7 +4412,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4419,7 +4420,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4427,7 +4428,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4435,7 +4436,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4443,7 +4444,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4451,7 +4452,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4459,7 +4460,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4467,7 +4468,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4475,7 +4476,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4483,7 +4484,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4491,7 +4492,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4499,7 +4500,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4507,7 +4508,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4515,7 +4516,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4523,7 +4524,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4531,13 +4532,13 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi32 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4545,7 +4546,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -4553,13 +4554,13 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi32 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4567,7 +4568,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -4575,13 +4576,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_rorv_epi64 (__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4589,7 +4590,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128(__U, @@ -4597,13 +4598,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_rorv_epi64 (__m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4611,7 +4612,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256(__U, @@ -4619,7 +4620,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4627,7 +4628,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4635,7 +4636,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4643,7 +4644,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4651,7 +4652,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4659,7 +4660,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4667,7 +4668,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4675,7 +4676,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4683,7 +4684,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4691,7 +4692,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4699,7 +4700,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4707,7 +4708,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4715,7 +4716,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4723,7 +4724,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4731,7 +4732,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4739,7 +4740,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4747,7 +4748,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4755,7 +4756,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4763,7 +4764,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4771,7 +4772,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4779,7 +4780,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4787,7 +4788,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4795,7 +4796,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4803,7 +4804,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4811,7 +4812,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4819,7 +4820,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4827,7 +4828,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4835,7 +4836,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4843,7 +4844,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4851,7 +4852,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4859,7 +4860,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4867,7 +4868,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4875,7 +4876,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4883,7 +4884,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -4891,7 +4892,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4899,7 +4900,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -4907,13 +4908,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srav_epi64(__m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4921,7 +4922,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -4929,13 +4930,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srav_epi64(__m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4943,7 +4944,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -4951,7 +4952,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, @@ -4959,7 +4960,7 @@ (__v4si) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U, @@ -4968,7 +4969,7 @@ } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, @@ -4976,7 +4977,7 @@ (__v8si) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U, @@ -4984,7 +4985,7 @@ (__v8si) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, @@ -4993,7 +4994,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P, @@ -5003,7 +5004,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, @@ -5012,7 +5013,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P, @@ -5022,7 +5023,7 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa32store128_mask ((__v4si *) __P, @@ -5030,7 +5031,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa32store256_mask ((__v8si *) __P, @@ -5038,7 +5039,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, @@ -5046,7 +5047,7 @@ (__v2di) __W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A) { return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U, @@ -5054,7 +5055,7 @@ (__v2di) _mm_setzero_si128 ()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, @@ -5062,7 +5063,7 @@ (__v4di) __W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A) { return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U, @@ -5070,7 +5071,7 @@ (__v4di) _mm256_setzero_si256 ()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, @@ -5079,7 +5080,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P, @@ -5089,7 +5090,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, @@ -5098,7 +5099,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P, @@ -5108,7 +5109,7 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_movdqa64store128_mask ((__v2di *) __P, @@ -5116,7 +5117,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_movdqa64store256_mask ((__v4di *) __P, @@ -5124,7 +5125,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5132,7 +5133,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5140,7 +5141,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5148,7 +5149,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5156,7 +5157,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -5164,7 +5165,7 @@ (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -5172,7 +5173,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -5180,7 +5181,7 @@ (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi32( __mmask8 __M, int __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -5189,7 +5190,7 @@ } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, @@ -5197,7 +5198,7 @@ (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m128i) __builtin_ia32_selectq_128(__M, @@ -5205,7 +5206,7 @@ (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, @@ -5213,7 +5214,7 @@ (__v4di) __O) ; } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A) { return (__m256i) __builtin_ia32_selectq_256(__M, @@ -5293,7 +5294,7 @@ (__v8si)(__m256i)(C), (int)(imm), \ (__mmask8)(U)) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, @@ -5301,7 +5302,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P, @@ -5310,7 +5311,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, @@ -5318,7 +5319,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_load_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P, @@ -5327,7 +5328,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, @@ -5335,7 +5336,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P, @@ -5344,7 +5345,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, @@ -5352,7 +5353,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_load_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P, @@ -5361,7 +5362,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, @@ -5369,7 +5370,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P, @@ -5378,7 +5379,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, @@ -5386,7 +5387,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P, @@ -5395,7 +5396,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, @@ -5403,7 +5404,7 @@ (__mmask8) __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P, @@ -5412,7 +5413,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, @@ -5420,7 +5421,7 @@ (__mmask8) __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P) { return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P, @@ -5429,7 +5430,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, @@ -5437,7 +5438,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P, @@ -5446,7 +5447,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, @@ -5454,7 +5455,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P) { return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P, @@ -5463,7 +5464,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, @@ -5471,7 +5472,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P, @@ -5480,7 +5481,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, @@ -5488,7 +5489,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P) { return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P, @@ -5497,7 +5498,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeapd128_mask ((__v2df *) __P, @@ -5505,7 +5506,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeapd256_mask ((__v4df *) __P, @@ -5513,7 +5514,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeaps128_mask ((__v4sf *) __P, @@ -5521,7 +5522,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeaps256_mask ((__v8sf *) __P, @@ -5529,7 +5530,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqudi128_mask ((__v2di *) __P, @@ -5537,7 +5538,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqudi256_mask ((__v4di *) __P, @@ -5545,7 +5546,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A) { __builtin_ia32_storedqusi128_mask ((__v4si *) __P, @@ -5553,7 +5554,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A) { __builtin_ia32_storedqusi256_mask ((__v8si *) __P, @@ -5561,7 +5562,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A) { __builtin_ia32_storeupd128_mask ((__v2df *) __P, @@ -5569,7 +5570,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A) { __builtin_ia32_storeupd256_mask ((__v4df *) __P, @@ -5577,7 +5578,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A) { __builtin_ia32_storeups128_mask ((__v4sf *) __P, @@ -5585,7 +5586,7 @@ (__mmask8) __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A) { __builtin_ia32_storeups256_mask ((__v8sf *) __P, @@ -5594,7 +5595,7 @@ } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5602,7 +5603,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5610,7 +5611,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5618,7 +5619,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5626,7 +5627,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5634,7 +5635,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5642,7 +5643,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5650,7 +5651,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5658,7 +5659,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5666,7 +5667,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5674,7 +5675,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5682,7 +5683,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5690,7 +5691,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5698,7 +5699,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5706,7 +5707,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5714,7 +5715,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5722,7 +5723,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rcp14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5731,7 +5732,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5739,7 +5740,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A, @@ -5748,7 +5749,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rcp14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5757,7 +5758,7 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5765,7 +5766,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A, @@ -5774,7 +5775,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rcp14_ps (__m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5783,7 +5784,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5791,7 +5792,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A, @@ -5800,7 +5801,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rcp14_ps (__m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5809,7 +5810,7 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5817,7 +5818,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A, @@ -5866,7 +5867,7 @@ (__v8sf)_mm256_permute_ps((X), (C)), \ (__v8sf)_mm256_setzero_ps()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5874,7 +5875,7 @@ (__v2df)__W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C) { return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U, @@ -5882,7 +5883,7 @@ (__v2df)_mm_setzero_pd()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5890,7 +5891,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -5898,7 +5899,7 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5906,7 +5907,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -5914,7 +5915,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5922,7 +5923,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -5930,115 +5931,115 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi32_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_test_epi64_mask (__m128i __A, __m128i __B) { return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_test_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi32_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi32_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_testn_epi64_mask (__m128i __A, __m128i __B) { return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS128 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B) { return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B), _mm_setzero_si128()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_testn_epi64_mask (__m256i __A, __m256i __B) { return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __mmask8 __DEFAULT_FN_ATTRS +static __inline__ __mmask8 __DEFAULT_FN_ATTRS256 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B) { return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B), _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6046,7 +6047,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6054,7 +6055,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6062,7 +6063,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6070,7 +6071,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6078,7 +6079,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6086,7 +6087,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6094,7 +6095,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6102,7 +6103,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6110,7 +6111,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6118,7 +6119,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6126,7 +6127,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6134,7 +6135,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6142,7 +6143,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, @@ -6150,7 +6151,7 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6158,7 +6159,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, @@ -6166,7 +6167,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6174,7 +6175,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6182,7 +6183,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6190,7 +6191,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6198,7 +6199,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6206,7 +6207,7 @@ (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U, @@ -6214,7 +6215,7 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6222,7 +6223,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U, @@ -6230,13 +6231,13 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_sra_epi64(__m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6244,7 +6245,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6252,13 +6253,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_sra_epi64(__m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6266,7 +6267,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6274,13 +6275,13 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_srai_epi64(__m128i __A, int __imm) { return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6288,7 +6289,7 @@ (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm) { return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \ @@ -6296,13 +6297,13 @@ (__v2di)_mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_srai_epi64(__m256i __A, int __imm) { return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6310,7 +6311,7 @@ (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \ @@ -6489,7 +6490,7 @@ (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \ (__v8sf)_mm256_setzero_ps()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_rsqrt14_pd (__m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6498,7 +6499,7 @@ (__mmask8) -1); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6506,7 +6507,7 @@ (__mmask8) __U); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A, @@ -6515,7 +6516,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_pd (__m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6524,7 +6525,7 @@ (__mmask8) -1); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6532,7 +6533,7 @@ (__mmask8) __U); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A, @@ -6541,7 +6542,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_rsqrt14_ps (__m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6550,7 +6551,7 @@ (__mmask8) -1); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6558,7 +6559,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A, @@ -6567,7 +6568,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_rsqrt14_ps (__m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6576,7 +6577,7 @@ (__mmask8) -1); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6584,7 +6585,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A, @@ -6593,14 +6594,14 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_broadcast_f32x4(__m128 __A) { return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -6608,7 +6609,7 @@ (__v8sf)__O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__M, @@ -6616,14 +6617,14 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_broadcast_i32x4(__m128i __A) { return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A, 0, 1, 2, 3, 0, 1, 2, 3); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -6631,7 +6632,7 @@ (__v8si)__O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -6639,7 +6640,7 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, @@ -6647,7 +6648,7 @@ (__v4df) __O); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A) { return (__m256d)__builtin_ia32_selectpd_256(__M, @@ -6655,7 +6656,7 @@ (__v4df) _mm256_setzero_pd()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, @@ -6663,7 +6664,7 @@ (__v4sf) __O); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m128)__builtin_ia32_selectps_128(__M, @@ -6671,7 +6672,7 @@ (__v4sf) _mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, @@ -6679,7 +6680,7 @@ (__v8sf) __O); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A) { return (__m256)__builtin_ia32_selectps_256(__M, @@ -6687,7 +6688,7 @@ (__v8sf) _mm256_setzero_ps()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -6695,7 +6696,7 @@ (__v4si) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128(__M, @@ -6703,7 +6704,7 @@ (__v4si) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -6711,7 +6712,7 @@ (__v8si) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectd_256(__M, @@ -6719,7 +6720,7 @@ (__v8si) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -6727,7 +6728,7 @@ (__v2di) __O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128(__M, @@ -6735,7 +6736,7 @@ (__v2di) _mm_setzero_si128()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -6743,7 +6744,7 @@ (__v4di) __O); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A) { return (__m256i)__builtin_ia32_selectq_256(__M, @@ -6751,7 +6752,7 @@ (__v4di) _mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, @@ -6759,14 +6760,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A, @@ -6774,13 +6775,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm256_cvtsepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -6788,14 +6789,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A, @@ -6803,13 +6804,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -6817,7 +6818,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -6825,7 +6826,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A, @@ -6833,13 +6834,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, @@ -6847,14 +6848,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A, @@ -6862,13 +6863,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, @@ -6876,14 +6877,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A, @@ -6891,13 +6892,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, @@ -6905,14 +6906,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A, @@ -6920,13 +6921,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, @@ -6934,14 +6935,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A, @@ -6949,13 +6950,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -6963,7 +6964,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -6971,7 +6972,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A, @@ -6979,13 +6980,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtsepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, @@ -6993,14 +6994,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A, @@ -7008,13 +7009,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtsepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, @@ -7022,14 +7023,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A, @@ -7037,13 +7038,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7051,7 +7052,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7059,7 +7060,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A, @@ -7067,13 +7068,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7081,7 +7082,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7089,7 +7090,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A, @@ -7097,13 +7098,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, @@ -7111,14 +7112,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A, @@ -7126,13 +7127,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi32_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, @@ -7140,14 +7141,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A, @@ -7155,13 +7156,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7169,7 +7170,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7177,7 +7178,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A, @@ -7185,13 +7186,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7199,7 +7200,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7207,7 +7208,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A, @@ -7215,13 +7216,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, @@ -7229,14 +7230,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A, @@ -7244,13 +7245,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi32 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, @@ -7258,14 +7259,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A, @@ -7273,13 +7274,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtusepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, @@ -7287,14 +7288,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A, @@ -7302,13 +7303,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtusepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, @@ -7316,14 +7317,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A, @@ -7331,13 +7332,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, @@ -7345,14 +7346,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A, @@ -7361,13 +7362,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, @@ -7375,14 +7376,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A, @@ -7390,13 +7391,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi32_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, @@ -7404,14 +7405,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A, @@ -7419,26 +7420,26 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi32_epi16 (__m256i __A) { return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A, @@ -7446,13 +7447,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi8 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, @@ -7460,14 +7461,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A, @@ -7475,13 +7476,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi8 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, @@ -7489,14 +7490,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, (__v16qi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A, @@ -7504,13 +7505,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi32 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, @@ -7518,14 +7519,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, (__v4si) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A, @@ -7533,19 +7534,19 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi32 (__m256i __A) { return (__m128i)__builtin_convertvector((__v4di)__A, __v4si); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -7553,7 +7554,7 @@ (__v4si)__O); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A) { return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M, @@ -7561,13 +7562,13 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_cvtepi64_epi16 (__m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7575,7 +7576,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7583,7 +7584,7 @@ __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A) { return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A, @@ -7591,13 +7592,13 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A) { __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_cvtepi64_epi16 (__m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, @@ -7605,14 +7606,14 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, (__v8hi) __O, __M); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A) { return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A, @@ -7620,7 +7621,7 @@ __M); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A) { __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M); @@ -7884,13 +7885,13 @@ (__v4di)_mm256_permutex_epi64((X), (C)), \ (__v4di)_mm256_setzero_si256()) -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_permutexvar_pd (__m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X, __m256d __Y) { @@ -7899,7 +7900,7 @@ (__v4df)__W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y) { return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U, @@ -7907,13 +7908,13 @@ (__v4df)_mm256_setzero_pd()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M, @@ -7921,7 +7922,7 @@ (__v4di)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -7932,7 +7933,7 @@ #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A)) -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -7940,7 +7941,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -7950,7 +7951,7 @@ #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A)) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y) { @@ -7959,7 +7960,7 @@ (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y) { return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M, @@ -8023,7 +8024,7 @@ (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \ (__v4di)_mm256_setzero_si256()) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8031,7 +8032,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8039,7 +8040,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8047,7 +8048,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8055,7 +8056,7 @@ (__v8sf)_mm256_setzero_ps()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8063,7 +8064,7 @@ (__v4sf)__W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A) { return (__m128)__builtin_ia32_selectps_128((__mmask8)__U, @@ -8071,7 +8072,7 @@ (__v4sf)_mm_setzero_ps()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8079,7 +8080,7 @@ (__v8sf)__W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A) { return (__m256)__builtin_ia32_selectps_256((__mmask8)__U, @@ -8107,7 +8108,7 @@ (__v4si)_mm_shuffle_epi32((A), (I)), \ (__v4si)_mm_setzero_si128()) -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8115,7 +8116,7 @@ (__v2df) __W); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A) { return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U, @@ -8123,7 +8124,7 @@ (__v2df) _mm_setzero_pd ()); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8131,7 +8132,7 @@ (__v4df) __W); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A) { return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U, @@ -8139,7 +8140,7 @@ (__v4df) _mm256_setzero_pd ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8147,7 +8148,7 @@ (__v4sf) __W); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A) { return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U, @@ -8155,7 +8156,7 @@ (__v4sf) _mm_setzero_ps ()); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8163,7 +8164,7 @@ (__v8sf) __W); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A) { return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U, @@ -8171,7 +8172,7 @@ (__v8sf) _mm256_setzero_ps ()); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, @@ -8179,7 +8180,7 @@ (__mmask8) __U); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A, @@ -8188,7 +8189,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, @@ -8196,7 +8197,7 @@ (__mmask8) __U); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A) { return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A, @@ -8205,7 +8206,7 @@ (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8213,7 +8214,7 @@ (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A) { return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8231,7 +8232,7 @@ (__v8hi)_mm_setzero_si128(), \ (__mmask8)(U)) -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS256 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8239,7 +8240,7 @@ (__mmask8) __U); } -static __inline __m128i __DEFAULT_FN_ATTRS +static __inline __m128i __DEFAULT_FN_ATTRS256 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A) { return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION, @@ -8257,6 +8258,7 @@ (__mmask8)(U)) -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __AVX512VLINTRIN_H */ Index: lib/Headers/avx512vlvbmi2intrin.h =================================================================== --- lib/Headers/avx512vlvbmi2intrin.h +++ lib/Headers/avx512vlvbmi2intrin.h @@ -29,9 +29,10 @@ #define __AVX512VLVBMI2INTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, @@ -39,7 +40,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D, @@ -47,7 +48,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, @@ -55,7 +56,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D, @@ -63,21 +64,21 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D) { __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D, __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS128 _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D) { __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D, __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, @@ -85,7 +86,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D, @@ -93,7 +94,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, @@ -101,7 +102,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D) { return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D, @@ -109,7 +110,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, @@ -117,7 +118,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P, @@ -125,7 +126,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, @@ -133,7 +134,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P) { return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P, @@ -141,7 +142,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, @@ -149,7 +150,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D, @@ -157,7 +158,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, @@ -165,7 +166,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D, @@ -173,21 +174,21 @@ __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D) { __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D, __U); } -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __DEFAULT_FN_ATTRS256 _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D) { __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D, __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, @@ -195,7 +196,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D, @@ -203,7 +204,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, @@ -211,7 +212,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D) { return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D, @@ -219,7 +220,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, @@ -227,7 +228,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P, @@ -235,7 +236,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, @@ -243,7 +244,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P) { return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P, @@ -419,7 +420,7 @@ (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \ (__v8hi)_mm_setzero_si128()) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, @@ -428,7 +429,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S, @@ -437,7 +438,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S, @@ -446,7 +447,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, @@ -455,7 +456,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S, @@ -464,7 +465,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S, @@ -473,7 +474,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, @@ -482,7 +483,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S, @@ -491,7 +492,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S, @@ -500,7 +501,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, @@ -509,7 +510,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S, @@ -518,7 +519,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S, @@ -527,7 +528,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, @@ -536,7 +537,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S, @@ -545,7 +546,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S, @@ -554,7 +555,7 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, @@ -563,7 +564,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S, @@ -572,7 +573,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S, @@ -581,7 +582,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, @@ -590,7 +591,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S, @@ -599,7 +600,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S, @@ -608,7 +609,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, @@ -617,7 +618,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S, @@ -626,7 +627,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S, @@ -635,7 +636,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, @@ -644,7 +645,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S, @@ -653,7 +654,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S, @@ -662,7 +663,7 @@ (__mmask8) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, @@ -671,7 +672,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S, @@ -680,7 +681,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S, @@ -689,7 +690,7 @@ (__mmask8) -1); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, @@ -698,7 +699,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S, @@ -707,7 +708,7 @@ __U); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S, @@ -716,7 +717,7 @@ (__mmask16) -1); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, @@ -725,7 +726,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S, @@ -734,7 +735,7 @@ __U); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S, @@ -744,6 +745,7 @@ } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vlvnniintrin.h =================================================================== --- lib/Headers/avx512vlvnniintrin.h +++ lib/Headers/avx512vlvnniintrin.h @@ -29,17 +29,18 @@ #define __AVX512VLVNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256))) -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -47,7 +48,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -55,14 +56,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -70,7 +71,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -78,14 +79,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -93,7 +94,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -101,14 +102,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A, (__v8si)__B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -116,7 +117,7 @@ (__v8si)__S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B) { return (__m256i)__builtin_ia32_selectd_256(__U, @@ -124,14 +125,14 @@ (__v8si)_mm256_setzero_si256()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -139,7 +140,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -147,14 +148,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -162,7 +163,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -170,14 +171,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -185,7 +186,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -193,14 +194,14 @@ (__v4si)_mm_setzero_si128()); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A, (__v4si)__B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -208,7 +209,7 @@ (__v4si)__S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B) { return (__m128i)__builtin_ia32_selectd_128(__U, @@ -216,6 +217,7 @@ (__v4si)_mm_setzero_si128()); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avx512vnniintrin.h =================================================================== --- lib/Headers/avx512vnniintrin.h +++ lib/Headers/avx512vnniintrin.h @@ -29,7 +29,7 @@ #define __AVX512VNNIINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS Index: lib/Headers/avx512vpopcntdqintrin.h =================================================================== --- lib/Headers/avx512vpopcntdqintrin.h +++ lib/Headers/avx512vpopcntdqintrin.h @@ -31,8 +31,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \ - "q"))) + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512))) static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); Index: lib/Headers/avx512vpopcntdqvlintrin.h =================================================================== --- lib/Headers/avx512vpopcntdqvlintrin.h +++ lib/Headers/avx512vpopcntdqvlintrin.h @@ -30,69 +30,76 @@ #define __AVX512VPOPCNTDQVLINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256))) -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi64(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectq_128( (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) { +static __inline__ __m128i __DEFAULT_FN_ATTRS128 +_mm_popcnt_epi32(__m128i __A) { return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) { return (__m128i)__builtin_ia32_selectd_128( (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W); } -static __inline__ __m128i __DEFAULT_FN_ATTRS +static __inline__ __m128i __DEFAULT_FN_ATTRS128 _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) { return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_popcnt_epi64(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectq_256( (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) { +static __inline__ __m256i __DEFAULT_FN_ATTRS256 +_mm256_popcnt_epi32(__m256i __A) { return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) { return (__m256i)__builtin_ia32_selectd_256( (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) { return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif Index: lib/Headers/avxintrin.h =================================================================== --- lib/Headers/avxintrin.h +++ lib/Headers/avxintrin.h @@ -50,7 +50,8 @@ typedef long long __m256i __attribute__((__vector_size__(32))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128))) /* Arithmetic */ /// Adds two 256-bit vectors of [4 x double]. @@ -780,7 +781,7 @@ /// 1: Bits [127:64] of the source are copied to bits [127:64] of the /// returned vector. /// \returns A 128-bit vector of [2 x double] containing the copied values. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_permutevar_pd(__m128d __a, __m128i __c) { return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c); @@ -873,7 +874,7 @@ /// 11: Bits [127:96] of the source are copied to bits [127:96] of the /// returned vector. /// \returns A 128-bit vector of [4 x float] containing the copied values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_permutevar_ps(__m128 __a, __m128i __c) { return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c); @@ -2497,7 +2498,7 @@ /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the ZF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b); @@ -2526,7 +2527,7 @@ /// \param __b /// A 128-bit vector of [2 x double]. /// \returns the CF flag in the EFLAGS register. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b); @@ -2556,7 +2557,7 @@ /// \param __b /// A 128-bit vector of [2 x double]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_pd(__m128d __a, __m128d __b) { return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b); @@ -2585,7 +2586,7 @@ /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the ZF flag. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testz_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b); @@ -2614,7 +2615,7 @@ /// \param __b /// A 128-bit vector of [4 x float]. /// \returns the CF flag. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b); @@ -2644,7 +2645,7 @@ /// \param __b /// A 128-bit vector of [4 x float]. /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0. -static __inline int __DEFAULT_FN_ATTRS +static __inline int __DEFAULT_FN_ATTRS128 _mm_testnzc_ps(__m128 __a, __m128 __b) { return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b); @@ -2948,7 +2949,7 @@ /// \headerfile /// /// This intrinsic corresponds to the VZEROALL instruction. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroall(void) { __builtin_ia32_vzeroall(); @@ -2959,7 +2960,7 @@ /// \headerfile /// /// This intrinsic corresponds to the VZEROUPPER instruction. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx"))) _mm256_zeroupper(void) { __builtin_ia32_vzeroupper(); @@ -2978,7 +2979,7 @@ /// The single-precision floating point value to be broadcast. /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set /// equal to the broadcast value. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_broadcast_ss(float const *__a) { float __f = *__a; @@ -3327,7 +3328,7 @@ /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [2 x double] containing the loaded values. -static __inline __m128d __DEFAULT_FN_ATTRS +static __inline __m128d __DEFAULT_FN_ATTRS128 _mm_maskload_pd(double const *__p, __m128i __m) { return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m); @@ -3376,7 +3377,7 @@ /// corresponding value in the memory location is not loaded and the /// corresponding field in the return value is set to zero. /// \returns A 128-bit vector of [4 x float] containing the loaded values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_maskload_ps(float const *__p, __m128i __m) { return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m); @@ -3449,7 +3450,7 @@ /// changed. /// \param __a /// A 128-bit vector of [2 x double] containing the values to be stored. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a) { __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a); @@ -3497,7 +3498,7 @@ /// changed. /// \param __a /// A 128-bit vector of [4 x float] containing the values to be stored. -static __inline void __DEFAULT_FN_ATTRS +static __inline void __DEFAULT_FN_ATTRS128 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a) { __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a); @@ -5057,5 +5058,6 @@ } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 #endif /* __AVXINTRIN_H */ Index: lib/Headers/emmintrin.h =================================================================== --- lib/Headers/emmintrin.h +++ lib/Headers/emmintrin.h @@ -45,8 +45,8 @@ typedef signed char __v16qs __attribute__((__vector_size__(16))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64))) /// Adds lower double-precision values in both operands and returns the /// sum in the lower 64 bits of the result. The upper 64 bits of the result @@ -4093,7 +4093,7 @@ /// A pointer to the 32-bit memory location used to store the value. /// \param __a /// A 32-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si32(int *__p, int __a) { __builtin_ia32_movnti(__p, __a); @@ -4113,7 +4113,7 @@ /// A pointer to the 64-bit memory location used to store the value. /// \param __a /// A 64-bit integer containing the value to be stored. -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2"))) _mm_stream_si64(long long *__p, long long __a) { __builtin_ia32_movnti64(__p, __a); Index: lib/Headers/f16cintrin.h =================================================================== --- lib/Headers/f16cintrin.h +++ lib/Headers/f16cintrin.h @@ -29,8 +29,10 @@ #define __F16CINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) +#define __DEFAULT_FN_ATTRS128 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 \ + __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256))) /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h, * but that's because icc can emulate these without f16c using a library call. @@ -47,7 +49,7 @@ /// \param __a /// A 16-bit half-precision float value. /// \returns The converted 32-bit float value. -static __inline float __DEFAULT_FN_ATTRS +static __inline float __DEFAULT_FN_ATTRS128 _cvtsh_ss(unsigned short __a) { __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0}; @@ -118,7 +120,7 @@ /// A 128-bit vector containing 16-bit half-precision float values. The lower /// 64 bits are used in the conversion. /// \returns A 128-bit vector of [4 x float] containing converted float values. -static __inline __m128 __DEFAULT_FN_ATTRS +static __inline __m128 __DEFAULT_FN_ATTRS128 _mm_cvtph_ps(__m128i __a) { return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a); @@ -162,12 +164,13 @@ /// converted to 32-bit single-precision float values. /// \returns A vector of [8 x float] containing the converted 32-bit /// single-precision float values. -static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c"))) +static __inline __m256 __DEFAULT_FN_ATTRS256 _mm256_cvtph_ps(__m128i __a) { return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __F16CINTRIN_H */ Index: lib/Headers/fma4intrin.h =================================================================== --- lib/Headers/fma4intrin.h +++ lib/Headers/fma4intrin.h @@ -31,200 +31,202 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __FMA4INTRIN_H */ Index: lib/Headers/fmaintrin.h =================================================================== --- lib/Headers/fmaintrin.h +++ lib/Headers/fmaintrin.h @@ -29,200 +29,202 @@ #define __FMAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma"))) +#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256))) -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C); } -static __inline__ __m128 __DEFAULT_FN_ATTRS +static __inline__ __m128 __DEFAULT_FN_ATTRS128 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C) { return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C); } -static __inline__ __m128d __DEFAULT_FN_ATTRS +static __inline__ __m128d __DEFAULT_FN_ATTRS128 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C) { return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C) { return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C) { return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C); } -#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS128 +#undef __DEFAULT_FN_ATTRS256 #endif /* __FMAINTRIN_H */ Index: lib/Headers/gfniintrin.h =================================================================== --- lib/Headers/gfniintrin.h +++ lib/Headers/gfniintrin.h @@ -120,16 +120,17 @@ U, A, B, I) /* Default attributes for simple form (no masking). */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128))) /* Default attributes for YMM unmasked form. */ -#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"))) +#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) +#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512))) /* Default attributes for VLX forms. */ -#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) +#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_gf2p8mul_epi8(__m128i __A, __m128i __B) @@ -138,7 +139,7 @@ (__v16qi) __B); } -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) { return (__m128i) __builtin_ia32_selectb_128(__U, @@ -146,7 +147,7 @@ (__v16qi) __S); } -static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128 _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) { return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), @@ -160,7 +161,7 @@ (__v32qi) __B); } -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) { return (__m256i) __builtin_ia32_selectb_256(__U, @@ -168,7 +169,7 @@ (__v32qi) __S); } -static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256 _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) { return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), @@ -200,7 +201,8 @@ #undef __DEFAULT_FN_ATTRS #undef __DEFAULT_FN_ATTRS_Y #undef __DEFAULT_FN_ATTRS_Z -#undef __DEFAULT_FN_ATTRS_VL +#undef __DEFAULT_FN_ATTRS_VL128 +#undef __DEFAULT_FN_ATTRS_VL256 #endif /* __GFNIINTRIN_H */ Index: lib/Headers/mm3dnow.h =================================================================== --- lib/Headers/mm3dnow.h +++ lib/Headers/mm3dnow.h @@ -30,9 +30,9 @@ typedef float __v2sf __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64))) -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow"))) _m_femms(void) { __builtin_ia32_femms(); } @@ -134,7 +134,7 @@ /* Handle the 3dnowa instructions here. */ #undef __DEFAULT_FN_ATTRS -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64))) static __inline__ __m64 __DEFAULT_FN_ATTRS _m_pf2iw(__m64 __m) { Index: lib/Headers/mmintrin.h =================================================================== --- lib/Headers/mmintrin.h +++ lib/Headers/mmintrin.h @@ -32,7 +32,7 @@ typedef char __v8qi __attribute__((__vector_size__(8))); /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64))) /// Clears the MMX state by setting the state of the x87 stack registers /// to empty. @@ -41,7 +41,7 @@ /// /// This intrinsic corresponds to the EMMS instruction. /// -static __inline__ void __DEFAULT_FN_ATTRS +static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("mmx"))) _mm_empty(void) { __builtin_ia32_emms(); Index: lib/Headers/pmmintrin.h =================================================================== --- lib/Headers/pmmintrin.h +++ lib/Headers/pmmintrin.h @@ -28,7 +28,7 @@ /* Define the default attributes for the functions in this file. */ #define __DEFAULT_FN_ATTRS \ - __attribute__((__always_inline__, __nodebug__, __target__("sse3"))) + __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128))) /// Loads data from an unaligned memory location to elements in a 128-bit /// vector. Index: lib/Headers/shaintrin.h =================================================================== --- lib/Headers/shaintrin.h +++ lib/Headers/shaintrin.h @@ -29,7 +29,7 @@ #define __SHAINTRIN_H /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128))) #define _mm_sha1rnds4_epu32(V1, V2, M) \ __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M)) Index: lib/Headers/smmintrin.h =================================================================== --- lib/Headers/smmintrin.h +++ lib/Headers/smmintrin.h @@ -27,7 +27,7 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128))) /* SSE4 Rounding macros. */ #define _MM_FROUND_TO_NEAREST_INT 0x00 Index: lib/Headers/tmmintrin.h =================================================================== --- lib/Headers/tmmintrin.h +++ lib/Headers/tmmintrin.h @@ -27,8 +27,8 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64))) /// Computes the absolute value of each of the packed 8-bit signed /// integers in the source operand and stores the 8-bit unsigned integer Index: lib/Headers/vaesintrin.h =================================================================== --- lib/Headers/vaesintrin.h +++ lib/Headers/vaesintrin.h @@ -29,10 +29,10 @@ #define __VAESINTRIN_H /* Default attributes for YMM forms. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256))) /* Default attributes for ZMM forms. */ -#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512))) static __inline__ __m256i __DEFAULT_FN_ATTRS Index: lib/Headers/xmmintrin.h =================================================================== --- lib/Headers/xmmintrin.h +++ lib/Headers/xmmintrin.h @@ -40,8 +40,8 @@ #endif /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"))) -#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64))) /// Adds the 32-bit float values in the low-order bits of the operands. /// Index: lib/Headers/xopintrin.h =================================================================== --- lib/Headers/xopintrin.h +++ lib/Headers/xopintrin.h @@ -31,7 +31,8 @@ #include /* Define the default attributes for the functions in this file. */ -#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"))) +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128))) +#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256))) static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C) @@ -201,7 +202,7 @@ return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C)); } -static __inline__ __m256i __DEFAULT_FN_ATTRS +static __inline__ __m256i __DEFAULT_FN_ATTRS256 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C) { return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C)); @@ -765,18 +766,19 @@ return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A); } -static __inline__ __m256 __DEFAULT_FN_ATTRS +static __inline__ __m256 __DEFAULT_FN_ATTRS256 _mm256_frcz_ps(__m256 __A) { return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A); } -static __inline__ __m256d __DEFAULT_FN_ATTRS +static __inline__ __m256d __DEFAULT_FN_ATTRS256 _mm256_frcz_pd(__m256d __A) { return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A); } #undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS256 #endif /* __XOPINTRIN_H */ Index: lib/Sema/SemaDeclAttr.cpp =================================================================== --- lib/Sema/SemaDeclAttr.cpp +++ lib/Sema/SemaDeclAttr.cpp @@ -2942,6 +2942,25 @@ D->addAttr(NewAttr); } +static void handleMinVectorWidthAttr(Sema &S, Decl *D, const AttributeList &AL) { + Expr *E = AL.getArgAsExpr(0); + uint32_t VecWidth; + if (!checkUInt32Argument(S, AL, E, VecWidth)) { + AL.setInvalid(); + return; + } + + MinVectorWidthAttr *Existing = D->getAttr(); + if (Existing && Existing->getVectorWidth() != VecWidth) { + S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL.getName(); + return; + } + + D->addAttr(::new (S.Context) + MinVectorWidthAttr(AL.getRange(), S.Context, VecWidth, + AL.getAttributeSpellingListIndex())); +} + static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &AL) { Expr *E = AL.getArgAsExpr(0); SourceLocation Loc = E->getExprLoc(); @@ -6130,6 +6149,9 @@ case AttributeList::AT_Target: handleTargetAttr(S, D, AL); break; + case AttributeList::AT_MinVectorWidth: + handleMinVectorWidthAttr(S, D, AL); + break; case AttributeList::AT_Unavailable: handleAttrWithMessage(S, D, AL); break; Index: test/CodeGen/function-min-vector-width.c =================================================================== --- /dev/null +++ test/CodeGen/function-min-vector-width.c @@ -0,0 +1,7 @@ +// This test verifies that we produce min-legal-vector-width attributes + +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s + +void __attribute((__min_vector_width__(128))) foo() {} + +// CHECK: "min-legal-vector-width"="128" Index: test/CodeGen/x86-builtins-vector-width.c =================================================================== --- /dev/null +++ test/CodeGen/x86-builtins-vector-width.c @@ -0,0 +1,32 @@ +// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s + +typedef signed long long V2LLi __attribute__((vector_size(16))); +typedef signed long long V4LLi __attribute__((vector_size(32))); + +// Make sure builtin forces a min-legal-width attribute +void foo(void) { + V2LLi tmp_V2LLi; + + tmp_V2LLi = __builtin_ia32_undef128(); +} + +// Make sure explicit attribute larger than builtin wins. +void goo(void) __attribute__((__min_vector_width__(256))) { + V2LLi tmp_V2LLi; + + tmp_V2LLi = __builtin_ia32_undef128(); +} + +// Make sure builtin larger than explicit attribute wins. +void hoo(void) __attribute__((__min_vector_width__(128))) { + V4LLi tmp_V4LLi; + + tmp_V4LLi = __builtin_ia32_undef256(); +} + +// CHECK: foo{{.*}} #0 +// CHECK: goo{{.*}} #1 +// CHECK: hoo{{.*}} #1 + +// CHECK: #0 = {{.*}}"min-legal-vector-width"="128" +// CHECK: #1 = {{.*}}"min-legal-vector-width"="256" Index: test/Sema/attr-min-vector-width.c =================================================================== --- /dev/null +++ test/Sema/attr-min-vector-width.c @@ -0,0 +1,12 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s + +int i; +void f(void) __attribute__((__min_vector_width__(i))); /* expected-error {{'__min_vector_width__' attribute requires an integer constant}} */ + +void f2(void) __attribute__((__min_vector_width__(128))); + +void f3(void) __attribute__((__min_vector_width__(128), __min_vector_width__(256))); /* expected-warning {{attribute '__min_vector_width__' is already applied with different parameters}} */ + +void f4(void) __attribute__((__min_vector_width__())); /* expected-error {{'__min_vector_width__' attribute takes one argument}} */ + +void f5(void) __attribute__((__min_vector_width__(128, 256))); /* expected-error {{'__min_vector_width__' attribute takes one argument}} */