Index: include/clang/Basic/Attr.td
===================================================================
--- include/clang/Basic/Attr.td
+++ include/clang/Basic/Attr.td
@@ -1943,6 +1943,13 @@
   }];
 }
 
+def MinVectorWidth : InheritableAttr {
+  let Spellings = [Clang<"min_vector_width">];
+  let Args = [UnsignedArgument<"VectorWidth">];
+  let Subjects = SubjectList<[Function], ErrorDiag>;
+  let Documentation = [MinVectorWidthDocs];
+}
+
 def TransparentUnion : InheritableAttr {
   let Spellings = [GCC<"transparent_union">];
 //  let Subjects = SubjectList<[Record, TypedefName]>;
Index: include/clang/Basic/AttrDocs.td
===================================================================
--- include/clang/Basic/AttrDocs.td
+++ include/clang/Basic/AttrDocs.td
@@ -1496,6 +1496,29 @@
 }];
 }
 
+def MinVectorWidthDocs : Documentation {
+  let Category = DocCatFunction;
+  let Content = [{
+Clang supports the ``__attribute__((min_vector_width(width)))`` attribute. This
+attribute may be attached to a function and informs the backend that this
+function desires vectors of at least this width to be generated. Target-specific
+maximum vector widths still apply. This means even if you ask for something
+larger than the target supports, you will only get what the target supports.
+This attribute is meant to be a hint to control target heuristics that may
+generate narrower vectors than what the target hardware supports.
+
+This is currently used by the X86 target to allow some CPUs that support 512-bit
+vectors to be limited to using 256-bit vectors to avoid frequency penalties.
+This is currently enabled with the -prefer-vector-width=256 command line option.
+The min_vector_width attribute can be used to prevent the backend from trying
+to split vector operations to match the prefer-vector-width. All X86 vector
+intrinsics from x86intrin.h already set this attribute. Additionally, use of
+any of the X86-specific vector builtins will implicitly set this attribute on
+the calling function. The intent is that explicitly writing vector code using
+the X86 intrinsics will prevent prefer-vector-width from affecting the code.
+}];
+}
+
 def DocCatAMDGPUAttributes : DocumentationCategory<"AMD GPU Attributes">;
 
 def AMDGPUFlatWorkGroupSizeDocs : Documentation {
Index: include/clang/Basic/Builtins.h
===================================================================
--- include/clang/Basic/Builtins.h
+++ include/clang/Basic/Builtins.h
@@ -206,6 +206,8 @@
     return getRecord(ID).Features;
   }
 
+  unsigned getRequiredVectorWidth(unsigned ID) const;
+
   /// Return true if builtin ID belongs to AuxTarget.
   bool isAuxBuiltinID(unsigned ID) const {
     return ID >= (Builtin::FirstTSBuiltin + TSRecords.size());
Index: include/clang/Basic/Builtins.def
===================================================================
--- include/clang/Basic/Builtins.def
+++ include/clang/Basic/Builtins.def
@@ -92,6 +92,7 @@
 //  e -> const, but only when -fno-math-errno
 //  j -> returns_twice (like setjmp)
 //  u -> arguments are not evaluated for their side-effects
+//  V:N: -> requires vectors of at least N bits to be legal
 //  FIXME: gcc has nonnull
 
 #if defined(BUILTIN) && !defined(LIBBUILTIN)
Index: include/clang/Basic/BuiltinsX86.def
===================================================================
--- include/clang/Basic/BuiltinsX86.def
+++ include/clang/Basic/BuiltinsX86.def
@@ -36,9 +36,9 @@
 
 // Undefined Values
 //
-TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "nc", "")
-TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "nc", "")
-TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "nc", "")
+TARGET_BUILTIN(__builtin_ia32_undef128, "V2d", "ncV:128:", "")
+TARGET_BUILTIN(__builtin_ia32_undef256, "V4d", "ncV:256:", "")
+TARGET_BUILTIN(__builtin_ia32_undef512, "V8d", "ncV:512:", "")
 
 // FLAGS
 //
@@ -48,32 +48,32 @@
 // 3DNow!
 //
 TARGET_BUILTIN(__builtin_ia32_femms, "v", "n", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "nc", "3dnow")
-TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "nc", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pavgusb, "V8cV8cV8c", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pf2id, "V2iV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfacc, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfadd, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfcmpeq, "V2iV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfcmpge, "V2iV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfcmpgt, "V2iV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfmax, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfmin, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfmul, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrcp, "V2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrcpit1, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrcpit2, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrsqrt, "V2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfrsqit1, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfsub, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pfsubr, "V2fV2fV2f", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pi2fd, "V2fV2i", "ncV:64:", "3dnow")
+TARGET_BUILTIN(__builtin_ia32_pmulhrw, "V4sV4sV4s", "ncV:64:", "3dnow")
 // 3DNow! Extensions (3dnowa).
-TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "nc", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "nc", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "nc", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "nc", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "nc", "3dnowa")
-TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "nc", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pf2iw, "V2iV2f", "ncV:64:", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pfnacc, "V2fV2fV2f", "ncV:64:", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pfpnacc, "V2fV2fV2f", "ncV:64:", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pi2fw, "V2fV2i", "ncV:64:", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pswapdsf, "V2fV2f", "ncV:64:", "3dnowa")
+TARGET_BUILTIN(__builtin_ia32_pswapdsi, "V2iV2i", "ncV:64:", "3dnowa")
 
 // MMX
 //
@@ -85,258 +85,258 @@
 // doesn't work in the presence of re-declaration of _mm_prefetch for windows.
 TARGET_BUILTIN(_mm_prefetch, "vcC*i", "nc", "mmx")
 TARGET_BUILTIN(__builtin_ia32_emms, "v", "n", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "n", "mmx")
-TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "n", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "nc", "mmx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2iIi", "nc", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddd, "V2iV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddsb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddsw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddusb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_paddusw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubd, "V2iV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubsb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubsw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubusb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psubusw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pmulhw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pmullw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd, "V2iV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pand, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pandn, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_por, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pxor, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllw, "V4sV4sV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pslld, "V2iV2iV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlw, "V4sV4sV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrld, "V2iV2iV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psraw, "V4sV4sV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrad, "V2iV2iV1LLi", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllwi, "V4sV4si", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pslldi, "V2iV2ii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psllqi, "V1LLiV1LLii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlwi, "V4sV4si", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrldi, "V2iV2ii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrlqi, "V1LLiV1LLii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psrawi, "V4sV4si", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_psradi, "V2iV2ii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_packsswb, "V8cV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_packssdw, "V4sV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_packuswb, "V8cV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckhbw, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckhwd, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckhdq, "V2iV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpcklbw, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpcklwd, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_punpckldq, "V2iV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpeqd, "V2iV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtb, "V8cV8cV8c", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtw, "V4sV4sV4s", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_pcmpgtd, "V2iV2iV2i", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_maskmovq, "vV8cV8cc*", "nV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_movntq, "vV1LLi*V1LLi", "nV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_init_v2si, "V2iii", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_init_v4hi, "V4sssss", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_init_v8qi, "V8ccccccccc", "ncV:64:", "mmx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v2si, "iV2ii", "ncV:64:", "mmx")
 
 // MMX2 (MMX+SSE) intrinsics
-TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "nc", "mmx,sse")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "nc", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_cvtpi2ps, "V4fV4fV2i", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pi, "V2iV4f", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_cvttps2pi, "V2iV4f", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pavgb, "V8cV8cV8c", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pavgw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmaxub, "V8cV8cV8c", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pminsw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pminub, "V8cV8cV8c", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb, "iV8c", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw, "V4sV4sV4s", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_psadbw, "V4sV8cV8c", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_pshufw, "V4sV4sIc", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4hi, "iV4sIi", "ncV:64:", "mmx,sse")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4hi, "V4sV4siIi", "ncV:64:", "mmx,sse")
 
 // MMX+SSE2
-TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "nc", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "nc", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "nc", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "nc", "mmx,sse2")
-TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "nc", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpi2pd, "V2dV2i", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2pi, "V2iV2d", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_paddq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq, "V1LLiV2iV2i", "ncV:64:", "mmx,sse2")
+TARGET_BUILTIN(__builtin_ia32_psubq, "V1LLiV1LLiV1LLi", "ncV:64:", "mmx,sse2")
 
 // MMX+SSSE3
-TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "nc", "mmx,ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "nc", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsb, "V8cV8c", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsd, "V2iV2i", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsw, "V4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_palignr, "V8cV8cV8cIc", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw, "V8cV8cV8c", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_pshufb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignw, "V4sV4sV4s", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignb, "V8cV8cV8c", "ncV:64:", "mmx,ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignd, "V2iV2iV2i", "ncV:64:", "mmx,ssse3")
 
 // SSE intrinsics.
-TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "nc", "sse")
-
-TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "nc", "sse2")
-
-TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "nc", "sse")
-
-TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "nc", "sse2")
-
-TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "nc", "sse3")
-TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "nc", "sse3")
-TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "nc", "sse3")
-TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "nc", "sse3")
-TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "nc", "sse3")
-TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "nc", "sse3")
-TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "nc", "ssse3")
-TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "nc", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_comieq, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_comilt, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_comile, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_comigt, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_comige, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_comineq, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomieq, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomilt, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomile, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomigt, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomige, "iV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_ucomineq, "iV4fV4f", "ncV:128:", "sse")
+
+TARGET_BUILTIN(__builtin_ia32_comisdeq, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdlt, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdle, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdgt, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdge, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_comisdneq, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdeq, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdlt, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdle, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdgt, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdge, "iV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_ucomisdneq, "iV2dV2d", "ncV:128:", "sse2")
+
+TARGET_BUILTIN(__builtin_ia32_cmpeqps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpltps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpleps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpunordps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpneqps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnltps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnleps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpordps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpeqss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpltss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpless, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpunordss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpneqss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnltss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpnless, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cmpordss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_minps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_maxps, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_minss, "V4fV4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_maxss, "V4fV4fV4f", "ncV:128:", "sse")
+
+TARGET_BUILTIN(__builtin_ia32_cmpeqpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpltpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmplepd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpunordpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpneqpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnltpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnlepd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpordpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpeqsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpltsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmplesd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpunordsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpneqsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnltsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpnlesd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cmpordsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_minpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_maxpd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_minsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_maxsd, "V2dV2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_paddusw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusb128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psubusw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmulhw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaxub128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pminub128, "V16cV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pminsw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_packsswb128, "V16cV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_packssdw128, "V8sV4iV4i", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_packuswb128, "V16cV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4si, "iV4iIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4sf, "fV4fIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v8hi, "sV8sIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v8hi, "V8sV8ssIi", "ncV:128:", "sse2")
+
+TARGET_BUILTIN(__builtin_ia32_addsubps, "V4fV4fV4f", "ncV:128:", "sse3")
+TARGET_BUILTIN(__builtin_ia32_addsubpd, "V2dV2dV2d", "ncV:128:", "sse3")
+TARGET_BUILTIN(__builtin_ia32_haddps, "V4fV4fV4f", "ncV:128:", "sse3")
+TARGET_BUILTIN(__builtin_ia32_haddpd, "V2dV2dV2d", "ncV:128:", "sse3")
+TARGET_BUILTIN(__builtin_ia32_hsubps, "V4fV4fV4f", "ncV:128:", "sse3")
+TARGET_BUILTIN(__builtin_ia32_hsubpd, "V2dV2dV2d", "ncV:128:", "sse3")
+TARGET_BUILTIN(__builtin_ia32_phaddw128, "V8sV8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddd128, "V4iV4iV4i", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phaddsw128, "V8sV8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubw128, "V8sV8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubd128, "V4iV4iV4i", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_phsubsw128, "V8sV8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw128, "V8sV16cV16c", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw128, "V8sV8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pshufb128, "V16cV16cV16c", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignb128, "V16cV16cV16c", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignw128, "V8sV8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_psignd128, "V4iV4iV4i", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsb128, "V16cV16c", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsw128, "V8sV8s", "ncV:128:", "ssse3")
+TARGET_BUILTIN(__builtin_ia32_pabsd128, "V4iV4i", "ncV:128:", "ssse3")
 
 TARGET_BUILTIN(__builtin_ia32_ldmxcsr, "vUi", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_setcsr, "vUi", "nh","xmmintrin.h", ALL_LANGUAGES, "sse")
 TARGET_BUILTIN(__builtin_ia32_stmxcsr, "Ui", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_getcsr, "Ui", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "n", "sse")
-TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "n", "sse")
-TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "n", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtss2si, "iV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si, "iV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_storehps, "vV2i*V4f", "nV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_storelps, "vV2i*V4f", "nV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_movmskps, "iV4f", "nV:128:", "sse")
 TARGET_BUILTIN(__builtin_ia32_sfence, "v", "n", "sse")
 TARGET_HEADER_BUILTIN(_mm_sfence, "v", "nh", "xmmintrin.h", ALL_LANGUAGES, "sse")
-TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "nc", "sse")
-
-TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2")
-TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_rcpps, "V4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_rcpss, "V4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_rsqrtps, "V4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "ncV:128:", "sse")
+
+TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "nV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb128, "iV16c", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_movnti, "vi*i", "n", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pshufd, "V4iV4iIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pshuflw, "V8sV8sIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pshufhw, "V8sV8sIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2dIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2si, "iV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si, "iV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2ss, "V4fV4fV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq, "V4iV4f", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq, "V4iV4f", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_clflush, "vvC*", "n", "sse2")
 TARGET_HEADER_BUILTIN(_mm_clflush, "vvC*", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2")
 TARGET_BUILTIN(__builtin_ia32_lfence, "v", "n", "sse2")
@@ -345,342 +345,342 @@
 TARGET_HEADER_BUILTIN(_mm_mfence, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "sse2")
 TARGET_BUILTIN(__builtin_ia32_pause, "v", "n", "")
 TARGET_HEADER_BUILTIN(_mm_pause, "v", "nh", "emmintrin.h", ALL_LANGUAGES, "")
-TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2LLiV2LLiIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2LLiV2LLiIi", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq128, "V2LLiV4iV4i", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psraw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrad128, "V4iV4iV4i", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrld128, "V4iV4iV4i", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllw128, "V8sV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslld128, "V4iV4iV4i", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllq128, "V2LLiV2LLiV2LLi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllwi128, "V8sV8si", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslldi128, "V4iV4ii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psllqi128, "V2LLiV2LLii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlwi128, "V8sV8si", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrldi128, "V4iV4ii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrlqi128, "V2LLiV2LLii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrawi128, "V8sV8si", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psradi128, "V4iV4ii", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd128, "V4iV8sV8s", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi128_byteshift, "V2LLiV2LLiIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi128_byteshift, "V2LLiV2LLiIi", "ncV:128:", "sse2")
 
 TARGET_BUILTIN(__builtin_ia32_monitor, "vv*UiUi", "n", "sse3")
 TARGET_BUILTIN(__builtin_ia32_mwait, "vUiUi", "n", "sse3")
-TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "n", "sse3")
-
-TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "nc", "ssse3")
-
-TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "nc", "sse4.1")
-
-TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "nc", "sse4.1")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_lddqu, "V16ccC*", "nV:128:", "sse3")
+
+TARGET_BUILTIN(__builtin_ia32_palignr128, "V16cV16cV16cIi", "ncV:128:", "ssse3")
+
+TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "ncV:128:", "sse4.1")
+
+TARGET_BUILTIN(__builtin_ia32_pmaxsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminsb128, "V16cV16cV16c", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminsd128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminud128, "V4iV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pminuw128, "V8sV8sV8s", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pmuldq128, "V2LLiV4iV4i", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundps, "V4fV4fIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundss, "V4fV4fV4fIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundsd, "V2dV2dV2dIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_roundpd, "V2dV2dIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_dpps, "V4fV4fV4fIc", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_dppd, "V2dV2dV2dIc", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestz128, "iV2LLiV2LLi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_ptestnzc128, "iV2LLiV2LLi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_mpsadbw128, "V16cV16cV16cIc", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_phminposuw128, "V8sV8s", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v16qi, "cV16cIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v16qi, "V16cV16ccIi", "ncV:128:", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4si, "V4iV4iiIi", "ncV:128:", "sse4.1")
 
 // SSE 4.2
-TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","nc", "sse4.2")
-
-TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","nc", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistrm128, "V16cV16cV16cIc", "ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistri128, "iV16cV16cIc", "ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestrm128, "V16cV16ciV16ciIc", "ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestri128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
+
+TARGET_BUILTIN(__builtin_ia32_pcmpistria128, "iV16cV16cIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistric128, "iV16cV16cIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistrio128, "iV16cV16cIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistris128, "iV16cV16cIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpistriz128, "iV16cV16cIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestria128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestric128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestrio128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestris128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
+TARGET_BUILTIN(__builtin_ia32_pcmpestriz128, "iV16ciV16ciIc","ncV:128:", "sse4.2")
 
 TARGET_BUILTIN(__builtin_ia32_crc32qi, "UiUiUc", "nc", "sse4.2")
 TARGET_BUILTIN(__builtin_ia32_crc32hi, "UiUiUs", "nc", "sse4.2")
 TARGET_BUILTIN(__builtin_ia32_crc32si, "UiUiUi", "nc", "sse4.2")
 
 // SSE4a
-TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "nc", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "nc", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "nc", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "nc", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "n", "sse4a")
-TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "n", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_extrqi, "V2LLiV2LLiIcIc", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_extrq, "V2LLiV2LLiV16c", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_insertqi, "V2LLiV2LLiV2LLiIcIc", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_insertq, "V2LLiV2LLiV2LLi", "ncV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_movntsd, "vd*V2d", "nV:128:", "sse4a")
+TARGET_BUILTIN(__builtin_ia32_movntss, "vf*V4f", "nV:128:", "sse4a")
 
 // AES
-TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "nc", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "nc", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "nc", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "nc", "aes")
-TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "nc", "aes")
-TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "nc", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesenc128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesdec128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast128, "V2LLiV2LLiV2LLi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "ncV:128:", "aes")
+TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "ncV:128:", "aes")
 
 // VAES
-TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "nc", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "nc", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "nc", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "nc", "vaes")
-TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "nc", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "ncV:256:", "vaes")
+TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f,vaes")
 
 // GFNI
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "nc", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "nc", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "nc", "avx512bw,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "nc", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "nc", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "nc", "avx512bw,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "nc", "gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "nc", "avx,gfni")
-TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "nc", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "ncV:128:", "gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "ncV:256:", "avx,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "ncV:512:", "avx512bw,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "ncV:128:", "gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "ncV:256:", "avx,gfni")
+TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "ncV:512:", "avx512bw,gfni")
 
 // CLMUL
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "nc", "pclmul")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "ncV:128:", "pclmul")
 
 // VPCLMULQDQ
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "nc", "vpclmulqdq")
-TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "nc", "avx512f,vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq256, "V4LLiV4LLiV4LLiIc", "ncV:256:", "vpclmulqdq")
+TARGET_BUILTIN(__builtin_ia32_pclmulqdq512, "V8LLiV8LLiV8LLiIc", "ncV:512:", "avx512f,vpclmulqdq")
 
 // AVX
-TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_addsubpd256, "V4dV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_addsubps256, "V8fV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_haddpd256, "V4dV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_hsubps256, "V8fV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_hsubpd256, "V4dV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_haddps256, "V8fV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maxpd256, "V4dV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maxps256, "V8fV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_minpd256, "V4dV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_minps256, "V8fV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd, "V2dV2dV2LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpps, "V4fV4fV4fIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpps256, "V8fV8fV8fIc", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpsd, "V2dV2dV2dIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cmpss, "V4fV4fV4fIc", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_pd256, "V2dV4dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_ps256, "V4fV8fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vextractf128_si256, "V4iV8iIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps256, "V4fV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq256, "V8iV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq256, "V4iV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq256, "V4iV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq256, "V8iV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_pd256, "V4dV4dV4dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_ps256, "V8fV8fV8fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vperm2f128_si256, "V8iV8iV8iIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilpd, "V2dV2dIi", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilps, "V4fV4fIi", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilpd256, "V4dV4dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vpermilps256, "V8fV8fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_pd256, "V4dV4dV2dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_ps256, "V8fV8fV4fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vinsertf128_si256, "V8iV8iV4iIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd256, "V4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_sqrtps256, "V8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_rsqrtps256, "V8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_rcpps256, "V8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_roundpd256, "V4dV4dIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_roundps256, "V8fV8fIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzpd, "iV2dV2d", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcpd, "iV2dV2d", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcpd, "iV2dV2d", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzps, "iV4fV4f", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcps, "iV4fV4f", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcps, "iV4fV4f", "ncV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzpd256, "iV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcpd256, "iV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcpd256, "iV4dV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestzps256, "iV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestcps256, "iV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vtestnzcps256, "iV8fV8f", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestz256, "iV4LLiV4LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestc256, "iV4LLiV4LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_ptestnzc256, "iV4LLiV4LLi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_movmskpd256, "iV4d", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_movmskps256, "iV8f", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_vzeroall, "v", "n", "avx")
 TARGET_BUILTIN(__builtin_ia32_vzeroupper, "v", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "n", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_lddqu256, "V32ccC*", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadpd, "V2dV2dC*V2LLi", "nV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadps, "V4fV4fC*V4i", "nV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadpd256, "V4dV4dC*V4LLi", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskloadps256, "V8fV8fC*V8i", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstorepd, "vV2d*V2LLiV2d", "nV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstoreps, "vV4f*V4iV4f", "nV:128:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstorepd256, "vV4d*V4LLiV4d", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_maskstoreps256, "vV8f*V8iV8f", "nV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v32qi, "cV32cIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v16hi, "sV16sIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v8si, "iV8iIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v32qi, "V32cV32ccIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v16hi, "V16sV16ssIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v8si, "V8iV8iiIi", "ncV:256:", "avx")
 
 // AVX2
-TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pshufhw256, "V16sV16sIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4LLiV4LLiIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4LLiV4LLiIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "nc", "avx2")
-TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_mpsadbw256, "V32cV32cV32cIc", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsb256, "V32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsw256, "V16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pabsd256, "V8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packsswb256, "V32cV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packssdw256, "V16sV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packuswb256, "V32cV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_packusdw256, "V16sV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_paddusw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phsubw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phsubd256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_phsubsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw256, "V16sV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd256, "V8iV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxub256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxud256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminub256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminuw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminud256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pminsd256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmovmskb256, "iV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmuldq256, "V4LLiV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmulhw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pmuludq256, "V4LLiV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psadbw256, "V4LLiV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pshufb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pshufd256, "V8iV8iIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pshuflw256, "V16sV16sIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pshufhw256, "V16sV16sIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psignb256, "V32cV32cV32c", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psignw256, "V16sV16sV16s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psignd256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllwi256, "V16sV16si", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllw256, "V16sV16sV8s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslldqi256_byteshift, "V4LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslldi256, "V8iV8ii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pslld256, "V8iV8iV4i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllqi256, "V4LLiV4LLii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrawi256, "V16sV16si", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psraw256, "V16sV16sV8s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psradi256, "V8iV8ii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrad256, "V8iV8iV4i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrldqi256_byteshift, "V4LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlwi256, "V16sV16si", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlw256, "V16sV16sV8s", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrldi256, "V8iV8ii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permdf256, "V4dV4dIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_permdi256, "V4LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_extract128i256, "V2LLiV4LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_insert128i256, "V4LLiV4LLiV2LLiIi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadd256, "V8iV8iC*V8i", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadq256, "V4LLiV4LLiC*V4LLi", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadd, "V4iV4iC*V4i", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskloadq, "V2LLiV2LLiC*V2LLi", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstored256, "vV8i*V8iV8i", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstoreq256, "vV4LLi*V4LLiV4LLi", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstored, "vV4i*V4iV4i", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_maskstoreq, "vV2LLi*V2LLiV2LLi", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv8si, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv4si, "V4iV4iV4i", "ncV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psllv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrav8si, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrav4si, "V4iV4iV4i", "ncV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv8si, "V8iV8iV8i", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv4si, "V4iV4iV4i", "ncV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv4di, "V4LLiV4LLiV4LLi", "ncV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_psrlv2di, "V2LLiV2LLiV2LLi", "ncV:128:", "avx2")
 
 // GATHER
-TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "n", "avx2")
-
-TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "n", "avx2")
-TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "n", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_pd, "V2dV2ddC*V4iV2dIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_pd256, "V4dV4ddC*V4iV4dIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_pd, "V2dV2ddC*V2LLiV2dIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_pd256, "V4dV4ddC*V4LLiV4dIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_ps, "V4fV4ffC*V4iV4fIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_ps256, "V8fV8ffC*V8iV8fIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_ps, "V4fV4ffC*V2LLiV4fIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_ps256, "V4fV4ffC*V4LLiV4fIc", "nV:256:", "avx2")
+
+TARGET_BUILTIN(__builtin_ia32_gatherd_q, "V2LLiV2LLiLLiC*V4iV2LLiIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_q256, "V4LLiV4LLiLLiC*V4iV4LLiIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_q, "V2LLiV2LLiLLiC*V2LLiV2LLiIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_q256, "V4LLiV4LLiLLiC*V4LLiV4LLiIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_d, "V4iV4iiC*V4iV4iIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherd_d256, "V8iV8iiC*V8iV8iIc", "nV:256:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_d, "V4iV4iiC*V2LLiV4iIc", "nV:128:", "avx2")
+TARGET_BUILTIN(__builtin_ia32_gatherq_d256, "V4iV4iiC*V4LLiV4iIc", "nV:256:", "avx2")
 
 // F16C
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "nc", "f16c")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "nc", "f16c")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "nc", "f16c")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "nc", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph, "V8sV4fIi", "ncV:128:", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256, "V8sV8fIi", "ncV:256:", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps, "V4fV8s", "ncV:128:", "f16c")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256, "V8fV8s", "ncV:256:", "f16c")
 
 // RDRAND
 TARGET_BUILTIN(__builtin_ia32_rdrand16_step, "UiUs*", "n", "rdrnd")
@@ -745,109 +745,109 @@
 TARGET_BUILTIN(__builtin_ia32_lwpval32, "vUiUiUi", "n", "lwp")
 
 // SHA
-TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "nc", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "nc", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "nc", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "nc", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "nc", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "nc", "sha")
-TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "nc", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1rnds4, "V4iV4iV4iIc", "ncV:128:", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1nexte, "V4iV4iV4i", "ncV:128:", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1msg1, "V4iV4iV4i", "ncV:128:", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha1msg2, "V4iV4iV4i", "ncV:128:", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha256rnds2, "V4iV4iV4iV4i", "ncV:128:", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha256msg1, "V4iV4iV4i", "ncV:128:", "sha")
+TARGET_BUILTIN(__builtin_ia32_sha256msg2, "V4iV4iV4i", "ncV:128:", "sha")
 
 // FMA
-TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "nc", "fma")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "nc", "fma")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "nc", "fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "nc", "fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "nc", "fma|fma4")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "nc", "fma|fma4")
-
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3, "V4fV4fV4fV4f", "ncV:128:", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3, "V2dV2dV2dV2d", "ncV:128:", "fma")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss, "V4fV4fV4fV4f", "ncV:128:", "fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd, "V2dV2dV2dV2d", "ncV:128:", "fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps, "V4fV4fV4fV4f", "ncV:128:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd, "V2dV2dV2dV2d", "ncV:128:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256, "V8fV8fV8fV8f", "ncV:256:", "fma|fma4")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256, "V4dV4dV4dV4d", "ncV:256:", "fma|fma4")
+
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_maskz, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd512_mask3, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_maskz, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps512_mask3, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
 
 // XOP
-TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "nc", "xop")
-
-TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "nc", "xop")
-TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "nc", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssww, "V8sV8sV8sV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsww, "V8sV8sV8sV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdd, "V4iV4iV4iV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdd, "V4iV4iV4iV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdql, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacssdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmacsdqh, "V2LLiV4iV4iV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmadcsswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpmadcswd, "V4iV8sV8sV4i", "ncV:128:", "xop")
+
+TARGET_BUILTIN(__builtin_ia32_vphaddbw, "V8sV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddbd, "V4iV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddbq, "V2LLiV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddwd, "V4iV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddwq, "V2LLiV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadddq, "V2LLiV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubw, "V8sV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubd, "V4iV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddubq, "V2LLiV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadduwd, "V4iV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphadduwq, "V2LLiV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphaddudq, "V2LLiV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubbw, "V8sV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubwd, "V4iV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vphsubdq, "V2LLiV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpperm, "V16cV16cV16cV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotb, "V16cV16cV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotw, "V8sV8sV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotd, "V4iV4iV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotbi, "V16cV16cIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotwi, "V8sV8sIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotdi, "V4iV4iIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vprotqi, "V2LLiV2LLiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlb, "V16cV16cV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlw, "V8sV8sV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshld, "V4iV4iV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshlq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshab, "V16cV16cV16c", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshaw, "V8sV8sV8s", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshad, "V4iV4iV4i", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpshaq, "V2LLiV2LLiV2LLi", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomub, "V16cV16cV16cIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomuw, "V8sV8sV8sIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomud, "V4iV4iV4iIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomuq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomb, "V16cV16cV16cIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomw, "V8sV8sV8sIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomd, "V4iV4iV4iIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpcomq, "V2LLiV2LLiV2LLiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2pd, "V2dV2dV2dV2LLiIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2pd256, "V4dV4dV4dV4LLiIc", "ncV:256:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2ps, "V4fV4fV4fV4iIc", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vpermil2ps256, "V8fV8fV8fV8iIc", "ncV:256:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczss, "V4fV4f", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczsd, "V2dV2d", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczps, "V4fV4f", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczpd, "V2dV2d", "ncV:128:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczps256, "V8fV8f", "ncV:256:", "xop")
+TARGET_BUILTIN(__builtin_ia32_vfrczpd256, "V4dV4d", "ncV:256:", "xop")
 
 TARGET_BUILTIN(__builtin_ia32_xbegin, "i", "n", "rtm")
 TARGET_BUILTIN(__builtin_ia32_xend, "v", "n", "rtm")
@@ -866,876 +866,876 @@
 TARGET_BUILTIN(__builtin_ia32_wrpkru, "vUi", "n", "pku")
 
 // AVX-512
-TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "nc", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er")
-
-TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "nc", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "nc", "avx512er")
-TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "nc", "avx512er")
-
-TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "nc", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_alignq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "nc", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "nc", "avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
-TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "nc", "avx512vnni")
-
-TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLiLLiC*V4LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4ffC*V2LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4iiC*V2LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4ffC*V4LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4iiC*V4LLiUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2ddC*V4iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLiLLiC*V4iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4ddC*V4iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLiLLiC*V4iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4ffC*V4iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4iiC*V4iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8ffC*V8iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8iiC*V8iUcIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8ddC*V8iUcIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16ffC*V16fUsIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8ddC*V8LLiUcIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8ffC*V8LLiUcIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiLLiC*V8iUcIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iiC*V16iUsIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiLLiC*V8LLiUcIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iiC*V8LLiUcIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vd*UcV8iV8dIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vf*UsV16iV16fIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vd*UcV8LLiV8dIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vf*UcV8LLiV8fIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vLLi*UcV8iV8LLiIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vi*UsV16iV16iIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vLLi*UcV8LLiV8LLiIi", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vi*UcV8LLiV8iIi", "n", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8iLLiC*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16iiC*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8LLiLLiC*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8LLiiC*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iLLi*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "n", "avx512pf")
-TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "n", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_sqrtpd512, "V8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_sqrtps512, "V16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_rsqrt28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rsqrt28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er")
+
+TARGET_BUILTIN(__builtin_ia32_rcp14sd_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14ss_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14pd512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rcp14ps512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_rcp28sd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_rcp28ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_exp2pd_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512er")
+TARGET_BUILTIN(__builtin_ia32_exp2ps_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512er")
+
+TARGET_BUILTIN(__builtin_ia32_cvttps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_cmpps512_mask,   "UsV16fV16fIiUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpps256_mask,   "UcV8fV8fIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpps128_mask,   "UcV4fV4fIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmppd512_mask, "UcV8dV8dIiUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmppd256_mask, "UcV4dV4dIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmppd128_mask, "UcV2dV2dIiUc", "ncV:128:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtps2dq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq512_mask, "V16iV16fV16iUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq512_mask, "V8iV8dV8iUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtdq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtudq2ps512_mask, "V16fV16iV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps512_mask, "V8fV8dV8fUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph512_mask, "V16sV16fIiV16sUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps512_mask, "V16fV16sV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsd512, "V16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pabsq512, "V8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxud512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsd512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminsq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminud512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pminuq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmuldq512, "V8LLiV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmuludq512, "V8LLiV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi512_mask, "V16iiC*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi512_mask, "V8LLiLLiC*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadups512_mask, "V16ffC*V16fUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadaps512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadupd512_mask, "V8ddC*V8dUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadapd512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storedqudi512_mask, "vLLi*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storedqusi512_mask, "vi*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeupd512_mask, "vd*V8dUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf64x4_mask, "V4dV8dIiV4dUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_mask, "V4fV16fIiV4fUc", "ncV:512:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpbusds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssd512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl,avx512vnni")
+TARGET_BUILTIN(__builtin_ia32_vpdpwssds512, "V16iV16iV16iV16i", "ncV:512:", "avx512vnni")
+
+TARGET_BUILTIN(__builtin_ia32_gather3div2df, "V2dV2ddC*V2LLiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div2di, "V2LLiV2LLiLLiC*V2LLiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4df, "V4dV4ddC*V4LLiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4di, "V4LLiV4LLiLLiC*V4LLiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4sf, "V4fV4ffC*V2LLiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div4si, "V4iV4iiC*V2LLiUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div8sf, "V4fV4ffC*V4LLiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3div8si, "V4iV4iiC*V4LLiUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv2df, "V2dV2ddC*V4iUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv2di, "V2LLiV2LLiLLiC*V4iUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv4df, "V4dV4ddC*V4iUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv4di, "V4LLiV4LLiLLiC*V4iUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv4sf, "V4fV4ffC*V4iUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv4si, "V4iV4iiC*V4iUcIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv8sf, "V8fV8ffC*V8iUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gather3siv8si, "V8iV8iiC*V8iUcIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_gathersiv8df, "V8dV8ddC*V8iUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv16sf, "V16fV16ffC*V16fUsIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8df, "V8dV8ddC*V8LLiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16sf, "V8fV8ffC*V8LLiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv8di, "V8LLiV8LLiLLiC*V8iUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gathersiv16si, "V16iV16iiC*V16iUsIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv8di, "V8LLiV8LLiLLiC*V8LLiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_gatherdiv16si, "V8iV8iiC*V8LLiUcIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8df, "vd*UcV8iV8dIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv16sf, "vf*UsV16iV16fIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8df,  "vd*UcV8LLiV8dIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16sf, "vf*UcV8LLiV8fIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8di,  "vLLi*UcV8iV8LLiIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scattersiv16si, "vi*UsV16iV16iIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8di,  "vLLi*UcV8LLiV8LLiIi", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv16si, "vi*UcV8LLiV8iIi", "nV:512:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_gatherpfdpd,  "vUcV8iLLiC*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfdps,  "vUsV16iiC*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqpd,  "vUcV8LLiLLiC*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_gatherpfqps,  "vUcV8LLiiC*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdpd, "vUcV8iLLi*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfdps, "vUsV16ii*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqpd, "vUcV8LLiLLi*IiIi", "nV:512:", "avx512pf")
+TARGET_BUILTIN(__builtin_ia32_scatterpfqps, "vUcV8LLii*IiIi", "nV:512:", "avx512pf")
 
 TARGET_BUILTIN(__builtin_ia32_knothi, "UsUs", "nc", "avx512f")
 
-TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "nc", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "nc", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "nc", "avx512cd")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "nc", "avx512cd")
-
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "nc", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "nc", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "nc", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "nc", "avx512vpopcntdq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "nc", "avx512vpopcntdq")
-TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "nc", "avx512vpopcntdq")
-
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "nc", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "nc", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "nc", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "nc", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "nc", "avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "nc", "avx512bitalg")
-
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "nc", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "nc", "avx512vl,avx512bitalg")
-TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "nc", "avx512bitalg")
-
-TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "nc", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "nc", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "nc", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-
-TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "nc", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "n", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "n", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "nc", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "nc", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "n", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "n", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "n", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "n", "avx512vl,avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "nc", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vd*UcV2LLiV2dIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vLLi*UcV2LLiV2LLiIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vd*UcV4LLiV4dIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vLLi*UcV4LLiV4LLiIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vf*UcV2LLiV4fIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vi*UcV2LLiV4iIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vf*UcV4LLiV4fIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vi*UcV4LLiV4iIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vd*UcV4iV2dIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vLLi*UcV4iV2LLiIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vd*UcV4iV4dIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vLLi*UcV4iV4LLiIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vf*UcV4iV4fIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vi*UcV4iV4iIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "n", "avx512vl")
-
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "nc", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "nc", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "nc", "avx512vbmi")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "nc", "avx512bw")
-
-TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "nc", "avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "nc", "avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "nc", "avx512vl,avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "nc", "avx512vbmi2")
-
-TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl,avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "nc", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolq512, "V8LLiV8LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolq128, "V2LLiV2LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolq256, "V4LLiV4LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorq512, "V8LLiV8LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorq128, "V2LLiV2LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorq256, "V4LLiV4LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "nc ", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "nc ", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8LLiV8LLiIi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8LLiV8LLiIi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "nc", "avx512ifma")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "nc", "avx512ifma,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb128_mask, "UsV16cV16cIiUs", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpd128_mask, "UcV4iV4iIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpq128_mask, "UcV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpw128_mask, "UcV8sV8sIiUc", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb256_mask, "UiV32cV32cIiUi", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpd256_mask, "UcV8iV8iIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpq256_mask, "UcV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_ucmpw256_mask, "UsV16sV16sIiUs", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpb512_mask, "ULLiV64cV64cIiULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_ucmpd512_mask, "UsV16iV16iIiUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ucmpq512_mask, "UcV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_ucmpw512_mask, "UiV32sV32sIiUi", "ncV:512:", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_pabsb512, "V64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pabsw512, "V32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packssdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packsswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packusdw512, "V32sV16iV16i", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_packuswb512, "V64cV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_paddusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaxuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminub512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pminuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pshufb512, "V64cV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubsw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psubusw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpconflictsi_128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpconflictsi_256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpconflictdi_512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vpconflictsi_512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_512, "V16iV16i", "ncV:512:", "avx512cd")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512cd")
+
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_128, "V4iV4i", "ncV:128:", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_256, "V8iV8i", "ncV:256:", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512vpopcntdq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "ncV:512:", "avx512vpopcntdq")
+TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "ncV:512:", "avx512vpopcntdq")
+
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "ncV:128:", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "ncV:128:", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "ncV:256:", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "ncV:256:", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "ncV:512:", "avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "ncV:512:", "avx512bitalg")
+
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "ncV:128:", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "ncV:256:", "avx512vl,avx512bitalg")
+TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "ncV:512:", "avx512bitalg")
+
+TARGET_BUILTIN(__builtin_ia32_pmulhrsw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhuw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmulhw512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_addpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_pmaddubsw512, "V32sV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmaddwd512, "V16iV32sV32s", "ncV:512:", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_addss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_addsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_divsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_mulsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_subsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_maxsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_minsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+
+TARGET_BUILTIN(__builtin_ia32_compressdf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressdi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_compresshi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compresshi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_compresssf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compresssi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredf128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredf256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_compressstorehi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstorehi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_compressstoresf128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresf256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_compressstoresi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2ps_mask, "V4fV2dV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2dq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq128_mask, "V4iV2dV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2udq256_mask, "V4iV4dV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq128_mask, "V4iV4fV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvttps2udq256_mask, "V8iV8fV8iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddf128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddf256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddi128_mask, "V2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expanddi256_mask, "V4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_expandhi128_mask, "V8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandhi256_mask, "V16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi128_mask, "V16cV16cV16cUs", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi256_mask, "V32cV32cV32cUi", "ncV:256:", "avx512vl,avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_expandloaddf128_mask, "V2dV2dC*V2dUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddf256_mask, "V4dV4dC*V4dUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi128_mask, "V4iV2LLiC*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_expandloadhi128_mask, "V8sV8sC*V8sUc", "nV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadhi256_mask, "V16sV16sC*V16sUs", "nV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi128_mask, "V16cV16cC*V16cUs", "nV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi256_mask, "V32cV32cC*V32cUi", "nV:256:", "avx512vl,avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_expandloadsf128_mask, "V4fV4fC*V4fUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsf256_mask, "V8fV8fC*V8fUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsi128_mask, "V4iV4iC*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandloadsi256_mask, "V8iV8iC*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsf128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsf256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsi128_mask, "V4iV4iV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_expandsi256_mask, "V8iV8iV8iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexppd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexppd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexpps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getexpps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq128, "V2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pabsq256, "V4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmaxuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminsq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pminuq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscalepd_256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rndscaleps_256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefpd128_mask, "V2dV2dV2dV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefpd256_mask, "V4dV4dV4dV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefps128_mask, "V4fV4fV4fV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scalefps256_mask, "V8fV8fV8fV8fUc", "ncV:256:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_scatterdiv2df, "vd*UcV2LLiV2dIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv2di, "vLLi*UcV2LLiV2LLiIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4df, "vd*UcV4LLiV4dIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4di, "vLLi*UcV4LLiV4LLiIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4sf, "vf*UcV2LLiV4fIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv4si, "vi*UcV2LLiV4iIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8sf, "vf*UcV4LLiV4fIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scatterdiv8si, "vi*UcV4LLiV4iIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv2df, "vd*UcV4iV2dIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv2di, "vLLi*UcV4iV2LLiIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4df, "vd*UcV4iV4dIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4di, "vLLi*UcV4iV4LLiIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4sf, "vf*UcV4iV4fIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv4si, "vi*UcV4iV4iIi", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8sf, "vf*UcV8iV8fIi", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_scattersiv8si, "vi*UcV8iV8iIi", "nV:256:", "avx512vl")
+
+TARGET_BUILTIN(__builtin_ia32_vpermi2vard128, "V4iV4iV4iV4i", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2vard256, "V8iV8iV8iV8i", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2vard512, "V16iV16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd128, "V2dV2dV2LLiV2d", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd256, "V4dV4dV4LLiV4d", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varpd512, "V8dV8dV8LLiV8d", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varps128, "V4fV4fV4iV4f", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varps256, "V8fV8fV8iV8f", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varps512, "V16fV16fV16iV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varqi128, "V16cV16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varqi256, "V32cV32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varqi512, "V64cV64cV64cV64c", "ncV:512:", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128, "V8sV8sV8sV8s", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256, "V16sV16sV16sV16s", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_vpermi2varhi512, "V32sV32sV32sV32s", "ncV:512:", "avx512bw")
+
+TARGET_BUILTIN(__builtin_ia32_vpshldd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_vpshldvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw128_mask, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw256_mask, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd128_maskz, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd256_maskz, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvd512_maskz, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw128_maskz, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw256_maskz, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshldvw512_maskz, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_mask, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_mask, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_mask, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_mask, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_mask, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_mask, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_mask, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_mask, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_mask, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd128_maskz, "V4iV4iV4iV4iUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd256_maskz, "V8iV8iV8iV8iUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvd512_maskz, "V16iV16iV16iV16iUs", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq128_maskz, "V2LLiV2LLiV2LLiV2LLiUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq256_maskz, "V4LLiV4LLiV4LLiV4LLiUc", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvq512_maskz, "V8LLiV8LLiV8LLiV8LLiUc", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw128_maskz, "V8sV8sV8sV8sUc", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw256_maskz, "V16sV16sV16sV16sUs", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdvw512_maskz, "V32sV32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_vpshrdd128, "V4iV4iV4iIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd256, "V8iV8iV8iIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdd512, "V16iV16iV16iIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq128, "V2LLiV2LLiV2LLiIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdq512, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw128, "V8sV8sV8sIi", "ncV:128:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw256, "V16sV16sV16sIi", "ncV:256:", "avx512vl,avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_vpshrdw512, "V32sV32sV32sIi", "ncV:512:", "avx512vbmi2")
+
+TARGET_BUILTIN(__builtin_ia32_pmovswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512_mask, "V32cV32sV32cUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps256_mask, "V4fV4LLiV4fUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq128_mask, "V2LLiV2dV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq256_mask, "V4LLiV4dV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq128_mask, "V2LLiV4fV2LLiUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq256_mask, "V4LLiV4fV4LLiUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps128_mask, "V4fV2LLiV4fUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps256_mask, "V4fV4LLiV4fUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangepd128_mask, "V2dV2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangepd256_mask, "V4dV4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangeps128_mask, "V4fV4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangeps256_mask, "V8fV8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangesd128_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangess128_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducepd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducepd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reduceps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reduceps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl,avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducesd_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducess_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_pmovswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb256_mask, "V16cV16sV16cUs", "ncV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovwb128_mask, "V16cV8sV16cUc", "ncV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2qq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttpd2uqq512_mask, "V8LLiV8dV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2qq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvttps2uqq512_mask, "V8LLiV8fV8LLiUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2pd512_mask, "V8dV8LLiV8dUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtuqq2ps512_mask, "V8fV8LLiV8fUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangepd512_mask, "V8dV8dV8dIiV8dUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_rangeps512_mask, "V16fV16fV16fIiV16fUsIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reducepd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_reduceps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_prold512, "V16iV16iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prolq512, "V8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prold128, "V4iV4iIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prold256, "V8iV8iIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolq128, "V2LLiV2LLiIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolq256, "V4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolvd512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prolvq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prord512, "V16iV16iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prorq512, "V8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prolvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolvq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prolvq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prord128, "V4iV4iIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prord256, "V8iV8iIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorq128, "V2LLiV2LLiIi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorq256, "V4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorvd512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prorvq512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_prorvd128, "V4iV4iV4i", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorvd256, "V8iV8iV8i", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorvq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_prorvq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pshufhw512, "V32sV32sIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pshuflw512, "V32sV32sIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psllv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psllw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psllwi512, "V32sV32si", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psllv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psllv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pslldi512, "V16iV16ii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllqi512, "V8LLiV8LLii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlv32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrlv16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psrlv8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psrldi512, "V16iV16ii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlqi512, "V8LLiV8LLii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrav32hi, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrav16hi, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psrav8hi, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psravq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psravq256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrawi512, "V32sV32si", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrlw512, "V32sV32sV8s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrlwi512, "V32sV32si", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pslldqi512_byteshift, "V8LLiV8LLiIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psrldqi512_byteshift, "V8LLiV8LLiIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_movdqa32load128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa32load256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa32load512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa32store512_mask, "vV16i*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_movdqa32store128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa32store256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load128_mask, "V2LLiV2LLiC*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64load256_mask, "V4LLiV4LLiC*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_movdqa64store256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq512, "V8LLiV8LLiV8LLiV8LLi", "ncV:512:", "avx512ifma")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52huq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq128, "V2LLiV2LLiV2LLiV2LLi", "ncV:128:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmadd52luq256, "V4LLiV4LLiV4LLiV4LLi", "ncV:256:", "avx512ifma,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcomisd, "iV2dV2dIiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcomiss, "iV4fV4fIiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kunpckdi, "ULLiULLiULLi", "nc", "avx512bw")
 TARGET_BUILTIN(__builtin_ia32_kunpcksi, "UiUiUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "n", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "n", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "n", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "n", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "nc", "avx512cd,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "n", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "n", "avx512vl,avx512bw")
-TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "n", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2dIiV2dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4dIiV4dUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fIiV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fIiV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask,  "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask,  "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permdi512, "V8LLiV8LLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "nc", "avx512vbmi")
-TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "nc", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "nc", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "nc", "avx512dq,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "nc", "avx512dq")
-TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "nc", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_loaddquhi512_mask, "V32sV32s*V32sUi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_loaddquqi512_mask, "V64cV64c*V64cULLi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_mask, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd512_maskz, "V8dV8dV8dV8LLiIiUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps512_mask, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps512_maskz, "V16fV16fV16fV16iIiUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmsd_mask, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmsd_maskz, "V2dV2dV2dV2LLiIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmss_mask, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_fixupimmss_maskz, "V4fV4fV4fV4iIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getexpsd128_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getexpss128_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getmantsd_round_mask, "V2dV2dV2dIiV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getmantss_round_mask, "V4fV4fV4fIiV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loaddquhi128_mask, "V8sV8s*V8sUc", "nV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddquhi256_mask, "V16sV16s*V16sUs", "nV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddquqi128_mask, "V16cV16c*V16cUs", "nV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddquqi256_mask, "V32cV32c*V32cUi", "nV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_mask, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd128_maskz, "V2dV2dV2dV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_mask, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmpd256_maskz, "V4dV4dV4dV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps128_mask, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps128_maskz, "V4fV4fV4fV4iIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps256_mask, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fixupimmps256_maskz, "V8fV8fV8fV8iIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadapd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadsd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadapd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadaps128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadss128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_loadaps256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi128_mask, "V2LLiV2LLi*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqudi256_mask, "V4LLiV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi128_mask, "V4iV4i*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loaddqusi256_mask, "V8iV8i*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadupd128_mask, "V2dV2d*V2dUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadupd256_mask, "V4dV4d*V4dUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadups128_mask, "V4fV4f*V4fUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_loadups256_mask, "V8fV8f*V8fUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedquhi512_mask, "vV32s*V32sUi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquhi128_mask, "vV8s*V8sUc", "nV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquhi256_mask, "vV16s*V16sUs", "nV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquqi128_mask, "vV16c*V16cUs", "nV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storedquqi256_mask, "vV32c*V32cUi", "nV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_storeapd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storesd128_mask, "vV2d*V2dUc", "nV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeapd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storeaps128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storess128_mask, "vV4f*V4fUc", "nV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_storeaps256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedqudi128_mask, "vV2LLi*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedqudi256_mask, "vV4LLi*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedqusi128_mask, "vV4i*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storedqusi256_mask, "vV8i*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storeupd128_mask, "vV2d*V2dUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storeupd256_mask, "vV4d*V4dUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storeups128_mask, "vV4f*V4fUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_storeups256_mask, "vV8f*V8fUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rcp14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rcp14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rcp14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rcp14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_128, "V4iV4i", "ncV:128:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntd_256, "V8iV8i", "ncV:256:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_128, "V2LLiV2LLi", "ncV:128:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vplzcntq_256, "V4LLiV4LLi", "ncV:256:", "avx512cd,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2si32, "iV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi32, "UiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2si32, "iV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2usi32, "UiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2si32, "iV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi32, "UiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2si32, "iV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usi32, "UiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilpd512, "V8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilps512, "V16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarpd512, "V8dV8dV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpermilvarps512, "V16fV16fV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rndscalesd_round_mask, "V2dV2dV2dV2dUcIiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rndscaless_round_mask, "V4fV4fV4fV4fUcIiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scalefpd512_mask, "V8dV8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scalefps512_mask, "V16fV16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scalefsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_scalefss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psradi512, "V16iV16ii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psraqi512, "V8LLiV8LLii", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psraq128, "V2LLiV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraq256, "V4LLiV4LLiV2LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraqi128, "V2LLiV2LLii", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_psraqi256, "V4LLiV4LLii", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pslld512, "V16iV16iV4i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllv16si, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psllv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrad512, "V16iV16iV4i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psraq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrav16si, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrav8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrld512, "V16iV16iV4i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlq512, "V8LLiV8LLiV2LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlv16si, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_psrlv8di, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogd512_mask, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogd512_maskz, "V16iV16iV16iV16iIiUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogq512_mask, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogq512_maskz, "V8LLiV8LLiV8LLiV8LLiIiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pternlogd128_mask, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogd128_maskz, "V4iV4iV4iV4iIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogd256_mask, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogd256_maskz, "V8iV8iV8iV8iIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq128_mask, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq128_maskz, "V2LLiV2LLiV2LLiV2LLiIiUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq256_mask, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pternlogq256_maskz, "V4LLiV4LLiV4LLiV4LLiIiUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_shuf_f32x4, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_shuf_i64x2_256, "V4LLiV4LLiV4LLiIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_sqrtsd_round_mask, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_sqrtss_round_mask, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14pd128_mask, "V2dV2dV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14pd256_mask, "V4dV4dV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ps128_mask, "V4fV4fV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_rsqrt14ps256_mask, "V8fV8fV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtb2mask512, "ULLiV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2b512, "V64cULLi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2w512, "V32sUi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtd2mask512, "UsV16i", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2d512, "V16iUs", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q512, "V8LLiUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask512, "UcV8LLi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_cvtb2mask128, "UsV16c", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtb2mask256, "UiV32c", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2b128, "V16cUs", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2b256, "V32cUi", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2w128, "V8sUc", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2w256, "V16sUs", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtd2mask128, "UcV4i", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtd2mask256, "UcV8i", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2d128, "V4iUc", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2d256, "V8iUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q128, "V2LLiUc", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtmask2q256, "V4LLiUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask128, "UcV2LLi", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtq2mask256, "UcV4LLi", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovsqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovuswb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd256_mask, "V4iV4LLiV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovusqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdb512_mask, "V16cV16iV16cUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovdb512mem_mask, "vV16c*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovwb512mem_mask, "vV32c*V32sUi", "nV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovdw512_mask, "V16sV16iV16sUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovdw512mem_mask, "vV16s*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqb512_mask, "V16cV8LLiV16cUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqb512mem_mask, "vV16c*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512_mask, "V8iV8LLiV8iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqd512mem_mask, "vV8i*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512_mask, "V8sV8LLiV8sUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovqw512mem_mask, "vV8s*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pmovdb128_mask, "V16cV4iV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovwb128mem_mask, "vV16c*V8sUc", "nV:128:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovdb128mem_mask, "vV16c*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdb256_mask, "V16cV8iV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdb256mem_mask, "vV16c*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovwb256mem_mask, "vV16c*V16sUs", "nV:256:", "avx512vl,avx512bw")
+TARGET_BUILTIN(__builtin_ia32_pmovdw128_mask, "V8sV4iV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdw128mem_mask, "vV8s*V4iUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdw256_mask, "V8sV8iV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovdw256mem_mask, "vV8s*V8iUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb128_mask, "V16cV2LLiV16cUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb128mem_mask, "vV16c*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb256_mask, "V16cV4LLiV16cUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqb256mem_mask, "vV16c*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqd128_mask, "V4iV2LLiV4iUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqd128mem_mask, "vV4i*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqd256mem_mask, "vV4i*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw128_mask, "V8sV2LLiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw128mem_mask, "vV8s*V2LLiUc", "nV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw256_mask, "V8sV4LLiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_pmovqw256mem_mask, "vV8s*V4LLiUc", "nV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf32x8_mask, "V8fV16fIiV8fUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_512_mask, "V2dV8dIiV2dUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti32x8_mask, "V8iV16iIiV8iUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_512_mask, "V2LLiV8LLiIiV2LLiUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4_mask, "V4iV16iIiV4iUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extracti64x4_mask, "V4LLiV8LLiIiV4LLiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_extractf64x2_256_mask, "V2dV4dIiV2dUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti64x2_256_mask, "V2LLiV4LLiIiV2LLiUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extractf32x4_256_mask, "V4fV8fIiV4fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_extracti32x4_256_mask, "V4iV8iIiV4iUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_insertf32x8, "V16fV16fV8fIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_insertf64x2_512, "V8dV8dV2dIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_inserti32x8, "V16iV16iV8iIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_512, "V8LLiV8LLiV2LLiIi", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_insertf64x4, "V8dV8dV4dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_inserti64x4, "V8LLiV8LLiV4LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_insertf64x2_256, "V4dV4dV2dIi", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_inserti64x2_256, "V4LLiV4LLiV2LLiIi", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_insertf32x4_256, "V8fV8fV4fIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_inserti32x4_256, "V8iV8iV4iIi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_insertf32x4, "V16fV16fV4fIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_inserti32x4, "V16iV16iV4iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getmantpd128_mask, "V2dV2dIiV2dUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getmantpd256_mask, "V4dV4dIiV4dUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getmantps128_mask, "V4fV4fIiV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getmantps256_mask, "V8fV8fIiV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_getmantpd512_mask, "V8dV8dIiV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getmantps512_mask, "V16fV16fIiV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getexppd512_mask, "V8dV8dV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_getexpps512_mask, "V16fV16fV16fUsIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask,  "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3_maskz, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask,  "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_maskz, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubsd3_mask3, "V2dV2dV2dV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vfnmsubss3_mask3, "V4fV4fV4fV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permdf512, "V8dV8dIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permdi512, "V8LLiV8LLiIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarhi512, "V32sV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_permvardf512, "V8dV8dV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvardi512, "V8LLiV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarsf512, "V16fV16fV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarsi512, "V16iV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_permvarqi512, "V64cV64cV64c", "ncV:512:", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_permvarqi128, "V16cV16cV16c", "ncV:128:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarqi256, "V32cV32cV32c", "ncV:256:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarhi128, "V8sV8sV8s", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvarhi256, "V16sV16sV16s", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvardf256, "V4dV4dV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_permvardi256, "V4LLiV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclasspd128_mask, "UcV2dIiUc", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclasspd256_mask, "UcV4dIiUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclassps128_mask, "UcV4fIiUc", "ncV:128:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclassps256_mask, "UcV8fIiUc", "ncV:256:", "avx512dq,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_fpclassps512_mask, "UsV16fIiUs", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_fpclasspd512_mask, "UcV8dIiUc", "ncV:512:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_fpclasssd_mask, "UcV2dIiUc", "ncV:128:", "avx512dq")
+TARGET_BUILTIN(__builtin_ia32_fpclassss_mask, "UcV4fIiUc", "ncV:128:", "avx512dq")
 TARGET_BUILTIN(__builtin_ia32_kandhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kandnhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_korhi, "UsUsUs", "nc", "avx512f")
@@ -1744,73 +1744,73 @@
 TARGET_BUILTIN(__builtin_ia32_kunpckhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxnorhi, "UsUsUs", "nc", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_kxorhi, "UsUsUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "nc", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "n", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "n", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "n", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "n", "avx512vbmi2")
-TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "n", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "nc", "avx512vbmi")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "nc", "avx512vbmi,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "nc", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_palignr512, "V64cV64cV64cIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw128, "V8sV16cV16cIi", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw256, "V16sV32cV32cIi", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_dbpsadbw512, "V32sV64cV64cIi", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_psadbw512, "V8LLiV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_compressdf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressdi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compresshi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compresssf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compresssi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpsd_mask, "UcV2dV2dIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cmpss_mask, "UcV4fV4fIiUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_pshufd512, "V16iV16iIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expanddf512_mask, "V8dV8dV8dUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expanddi512_mask, "V8LLiV8LLiV8LLiUc", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandhi512_mask, "V32sV32sV32sUi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandqi512_mask, "V64cV64cV64cULLi", "ncV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloaddf512_mask, "V8dV8dC*V8dUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandloaddi512_mask, "V8LLiV8LLiC*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandloadhi512_mask, "V32sV32sC*V32sUi", "nV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadqi512_mask, "V64cV64cC*V64cULLi", "nV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_expandloadsf512_mask, "V16fV16fC*V16fUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandloadsi512_mask, "V16iV16iC*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandsf512_mask, "V16fV16fV16fUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_expandsi512_mask, "V16iV16iV16iUs", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtps2pd512_mask, "V8dV8fV8dUcIi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressstoredf512_mask, "vV8d*V8dUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressstoredi512_mask, "vV8LLi*V8LLiUc", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressstorehi512_mask, "vV32s*V32sUi", "nV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoreqi512_mask, "vV64c*V64cULLi", "nV:512:", "avx512vbmi2")
+TARGET_BUILTIN(__builtin_ia32_compressstoresf512_mask, "vV16f*V16fUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_compressstoresi512_mask, "vV16i*V16iUs", "nV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps_mask, "V4fV8sV4fUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2ps256_mask, "V8fV8sV8fUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph_mask, "V8sV4fIiV8sUc", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2ph256_mask, "V8sV8fIiV8sUc", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtw2mask512, "UiV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_cvtw2mask128, "UcV8s", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtw2mask256, "UsV16s", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2ss_round_mask, "V4fV4fV2dV4fUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtsi2ss32, "V4fV4fiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtss2sd_round_mask, "V2dV2dV4fV2dUcIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtusi2ss32, "V4fV4fUiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb512_mask, "V64cV64cV64cV64cULLi", "ncV:512:", "avx512vbmi")
+TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb128_mask, "V16cV16cV16cV16cUs", "ncV:128:", "avx512vbmi,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_vpmultishiftqb256_mask, "V32cV32cV32cV32cUi", "ncV:256:", "avx512vbmi,avx512vl")
 
 // generic select intrinsics
-TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "nc", "avx512bw,avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "nc", "avx512bw")
-TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "nc", "avx512vl")
-TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectb_128, "V16cUsV16cV16c", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectb_256, "V32cUiV32cV32c", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectb_512, "V64cULLiV64cV64c", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_selectw_128, "V8sUcV8sV8s", "ncV:128:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectw_256, "V16sUsV16sV16s", "ncV:256:", "avx512bw,avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectw_512, "V32sUiV32sV32s", "ncV:512:", "avx512bw")
+TARGET_BUILTIN(__builtin_ia32_selectd_128, "V4iUcV4iV4i", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectd_256, "V8iUcV8iV8i", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectd_512, "V16iUsV16iV16i", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectq_128, "V2LLiUcV2LLiV2LLi", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectq_256, "V4LLiUcV4LLiV4LLi", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectq_512, "V8LLiUcV8LLiV8LLi", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectps_128, "V4fUcV4fV4f", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectps_256, "V8fUcV8fV8f", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectps_512, "V16fUsV16fV16f", "ncV:512:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_selectpd_128, "V2dUcV2dV2d", "ncV:128:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectpd_256, "V4dUcV4dV4d", "ncV:256:", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_selectpd_512, "V8dUcV8dV8d", "ncV:512:", "avx512f")
 
 // MONITORX/MWAITX
 TARGET_BUILTIN(__builtin_ia32_monitorx, "vv*UiUi", "n", "mwaitx")
Index: include/clang/Basic/BuiltinsX86_64.def
===================================================================
--- include/clang/Basic/BuiltinsX86_64.def
+++ include/clang/Basic/BuiltinsX86_64.def
@@ -44,16 +44,16 @@
 
 TARGET_BUILTIN(__builtin_ia32_readeflags_u64, "ULLi", "n", "")
 TARGET_BUILTIN(__builtin_ia32_writeeflags_u64, "vULLi", "n", "")
-TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "nc", "sse")
-TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvtss2si64, "LLiV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvttss2si64, "LLiV4f", "ncV:128:", "sse")
+TARGET_BUILTIN(__builtin_ia32_cvtsd2si64, "LLiV2d", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_cvttsd2si64, "LLiV2d", "ncV:128:", "sse2")
 TARGET_BUILTIN(__builtin_ia32_movnti64, "vLLi*LLi", "n", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "nc", "sse2")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v2di, "LLiV2LLiIi", "ncV:128:", "sse2")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v2di, "V2LLiV2LLiLLiIi", "ncV:128:", "sse4.1")
 TARGET_BUILTIN(__builtin_ia32_crc32di, "ULLiULLiULLi", "nc", "sse4.2")
-TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "nc", "avx")
-TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_ext_v4di, "LLiV4LLiIi", "ncV:256:", "avx")
+TARGET_BUILTIN(__builtin_ia32_vec_set_v4di, "V4LLiV4LLiLLiIi", "ncV:256:", "avx")
 TARGET_BUILTIN(__builtin_ia32_rdfsbase32, "Ui", "n", "fsgsbase")
 TARGET_BUILTIN(__builtin_ia32_rdfsbase64, "ULLi", "n", "fsgsbase")
 TARGET_BUILTIN(__builtin_ia32_rdgsbase32, "Ui", "n", "fsgsbase")
@@ -86,18 +86,18 @@
 TARGET_BUILTIN(__builtin_ia32_bextri_u64, "ULLiULLiIULLi", "nc", "tbm")
 TARGET_BUILTIN(__builtin_ia32_lwpins64, "UcULLiUiUi", "n", "lwp")
 TARGET_BUILTIN(__builtin_ia32_lwpval64, "vULLiUiUi", "n", "lwp")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "nc", "avx512f")
-TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2si64, "LLiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2si64, "LLiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvtss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2si64, "LLiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttsd2usi64, "ULLiV2dIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2si64, "LLiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_vcvttss2usi64, "ULLiV4fIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtsi2sd64, "V2dV2dLLiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtsi2ss64, "V4fV4fLLiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtusi2sd64, "V2dV2dULLiIi", "ncV:128:", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_cvtusi2ss64, "V4fV4fULLiIi", "ncV:128:", "avx512f")
 TARGET_BUILTIN(__builtin_ia32_directstore_u64, "vULi*ULi", "n", "movdiri")
 TARGET_BUILTIN(__builtin_ia32_ptwrite64, "vULLi", "n", "ptwrite")
 
Index: lib/Basic/Builtins.cpp
===================================================================
--- lib/Basic/Builtins.cpp
+++ lib/Basic/Builtins.cpp
@@ -107,6 +107,22 @@
   Table.get(getRecord(ID).Name).setBuiltinID(0);
 }
 
+unsigned Builtin::Context::getRequiredVectorWidth(unsigned ID) const {
+  const char *WidthPos = ::strchr(getRecord(ID).Attributes, 'V');
+  if (!WidthPos)
+    return 0;
+
+  ++WidthPos;
+  assert(*WidthPos == ':' &&
+         "Vector width specifier must be followed by a ':'");
+  ++WidthPos;
+
+  char *EndPos;
+  unsigned Width = ::strtol(WidthPos, &EndPos, 10);
+  assert(*EndPos == ':' && "Vector width specific must end with a ':'");
+  return Width;
+}
+
 bool Builtin::Context::isLike(unsigned ID, unsigned &FormatIdx,
                               bool &HasVAListArg, const char *Fmt) const {
   assert(Fmt && "Not passed a format string");
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -3654,6 +3654,9 @@
   // can move this up to the beginning of the function.
   checkTargetFeatures(E, FD);
 
+  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
+    LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
+
   // See if we have a target specific intrinsic.
   const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
   Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
Index: lib/CodeGen/CodeGenFunction.h
===================================================================
--- lib/CodeGen/CodeGenFunction.h
+++ lib/CodeGen/CodeGenFunction.h
@@ -1463,6 +1463,10 @@
   /// Terminate funclets keyed by parent funclet pad.
   llvm::MapVector<llvm::Value *, llvm::BasicBlock *> TerminateFunclets;
 
+  /// Largest vector width used in ths function. Will be used to create a
+  /// function attribute.
+  unsigned LargestVectorWidth;
+
   /// True if we need emit the life-time markers.
   const bool ShouldEmitLifetimeMarkers;
 
Index: lib/CodeGen/CodeGenFunction.cpp
===================================================================
--- lib/CodeGen/CodeGenFunction.cpp
+++ lib/CodeGen/CodeGenFunction.cpp
@@ -81,7 +81,7 @@
       CXXStructorImplicitParamDecl(nullptr),
       CXXStructorImplicitParamValue(nullptr), OutermostConditional(nullptr),
       CurLexicalScope(nullptr), TerminateLandingPad(nullptr),
-      TerminateHandler(nullptr), TrapBB(nullptr),
+      TerminateHandler(nullptr), TrapBB(nullptr), LargestVectorWidth(0),
       ShouldEmitLifetimeMarkers(
           shouldEmitLifetimeMarkers(CGM.getCodeGenOpts(), CGM.getLangOpts())) {
   if (!suppressNewContext)
@@ -445,6 +445,11 @@
         cast<llvm::AllocaInst>(NormalCleanupDest.getPointer()), DT);
     NormalCleanupDest = Address::invalid();
   }
+
+  // Add the required-vector-width attribute.
+  if (LargestVectorWidth != 0)
+    CurFn->addFnAttr("min-legal-vector-width",
+                     llvm::utostr(LargestVectorWidth));
 }
 
 /// ShouldInstrumentFunction - Return true if the current function should be
@@ -1186,6 +1191,12 @@
   // Emit a location at the end of the prologue.
   if (CGDebugInfo *DI = getDebugInfo())
     DI->EmitLocation(Builder, StartLoc);
+
+  // TODO: Do we need to handle this in two places like we do with
+  // target-features/target-cpu?
+  if (CurFuncDecl)
+    if (const auto *VecWidth = CurFuncDecl->getAttr<MinVectorWidthAttr>())
+      LargestVectorWidth = VecWidth->getVectorWidth();
 }
 
 void CodeGenFunction::EmitFunctionBody(FunctionArgList &Args,
Index: lib/Headers/__wmmintrin_aes.h
===================================================================
--- lib/Headers/__wmmintrin_aes.h
+++ lib/Headers/__wmmintrin_aes.h
@@ -29,7 +29,7 @@
 #define __WMMINTRIN_AES_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("aes"), __min_vector_width__(128)))
 
 /// Performs a single round of AES encryption using the Equivalent
 ///    Inverse Cipher, transforming the state value from the first source
Index: lib/Headers/ammintrin.h
===================================================================
--- lib/Headers/ammintrin.h
+++ lib/Headers/ammintrin.h
@@ -27,7 +27,7 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4a"), __min_vector_width__(128)))
 
 /// Extracts the specified bits from the lower 64 bits of the 128-bit
 ///    integer vector operand at the index \a idx and of the length \a len.
Index: lib/Headers/avx2intrin.h
===================================================================
--- lib/Headers/avx2intrin.h
+++ lib/Headers/avx2intrin.h
@@ -29,98 +29,99 @@
 #define __AVX2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx2")))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx2"), __min_vector_width__(128)))
 
 /* SSE4 Multiple Packed Sums of Absolute Difference.  */
 #define _mm256_mpsadbw_epu8(X, Y, M) \
   (__m256i)__builtin_ia32_mpsadbw256((__v32qi)(__m256i)(X), \
                                      (__v32qi)(__m256i)(Y), (int)(M))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi8(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsb256((__v32qi)__a);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi16(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsw256((__v16hi)__a);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi32(__m256i __a)
 {
     return (__m256i)__builtin_ia32_pabsd256((__v8si)__a);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_packs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packsswb256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_packs_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packssdw256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_packus_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_packuswb256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_packus_epi32(__m256i __V1, __m256i __V2)
 {
   return (__m256i) __builtin_ia32_packusdw256((__v8si)__V1, (__v8si)__V2);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a + (__v32qu)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a + (__v16hu)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a + (__v8su)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_add_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a + (__v4du)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddusb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_adds_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_paddusw256((__v16hi)__a, (__v16hi)__b);
@@ -130,19 +131,19 @@
   (__m256i)__builtin_ia32_palignr256((__v32qi)(__m256i)(a), \
                                      (__v32qi)(__m256i)(b), (n))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_and_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a & (__v4du)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_andnot_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)(~(__v4du)__a & (__v4du)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_avg_epu8(__m256i __a, __m256i __b)
 {
   typedef unsigned short __v32hu __attribute__((__vector_size__(64)));
@@ -152,7 +153,7 @@
                  >> 1, __v32qu);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_avg_epu16(__m256i __a, __m256i __b)
 {
   typedef unsigned int __v16su __attribute__((__vector_size__(64)));
@@ -162,7 +163,7 @@
                  >> 1, __v16hu);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_blendv_epi8(__m256i __V1, __m256i __V2, __m256i __M)
 {
   return (__m256i)__builtin_ia32_pblendvb256((__v32qi)__V1, (__v32qi)__V2,
@@ -173,31 +174,31 @@
   (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
                                      (__v16hi)(__m256i)(V2), (int)(M))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpeq_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qi)__a == (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpeq_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a == (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpeq_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a == (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpeq_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4di)__a == (__v4di)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpgt_epi8(__m256i __a, __m256i __b)
 {
   /* This function always performs a signed comparison, but __v32qi is a char
@@ -205,151 +206,151 @@
   return (__m256i)((__v32qs)__a > (__v32qs)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpgt_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hi)__a > (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpgt_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8si)__a > (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmpgt_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4di)__a > (__v4di)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_hadd_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_hadd_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_hadds_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phaddsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_hsub_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_hsub_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_hsubs_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_phsubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maddubs_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_pmaddubsw256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_madd_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaddwd256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxsd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxub256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxuw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmaxud256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminsd256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminub256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminuw256 ((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pminud256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS256
 _mm256_movemask_epi8(__m256i __a)
 {
   return __builtin_ia32_pmovmskb256((__v32qi)__a);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi8_epi16(__m128i __V)
 {
   /* This function always performs a signed extension, but __v16qi is a char
@@ -357,7 +358,7 @@
   return (__m256i)__builtin_convertvector((__v16qs)__V, __v16hi);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi8_epi32(__m128i __V)
 {
   /* This function always performs a signed extension, but __v16qi is a char
@@ -365,7 +366,7 @@
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi8_epi64(__m128i __V)
 {
   /* This function always performs a signed extension, but __v16qi is a char
@@ -373,115 +374,115 @@
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__V, (__v16qs)__V, 0, 1, 2, 3), __v4di);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi16_epi32(__m128i __V)
 {
   return (__m256i)__builtin_convertvector((__v8hi)__V, __v8si);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi16_epi64(__m128i __V)
 {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hi)__V, (__v8hi)__V, 0, 1, 2, 3), __v4di);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi32_epi64(__m128i __V)
 {
   return (__m256i)__builtin_convertvector((__v4si)__V, __v4di);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepu8_epi16(__m128i __V)
 {
   return (__m256i)__builtin_convertvector((__v16qu)__V, __v16hi);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepu8_epi32(__m128i __V)
 {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3, 4, 5, 6, 7), __v8si);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepu8_epi64(__m128i __V)
 {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__V, (__v16qu)__V, 0, 1, 2, 3), __v4di);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepu16_epi32(__m128i __V)
 {
   return (__m256i)__builtin_convertvector((__v8hu)__V, __v8si);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepu16_epi64(__m128i __V)
 {
   return (__m256i)__builtin_convertvector(__builtin_shufflevector((__v8hu)__V, (__v8hu)__V, 0, 1, 2, 3), __v4di);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtepu32_epi64(__m128i __V)
 {
   return (__m256i)__builtin_convertvector((__v4su)__V, __v4di);
 }
 
-static __inline__  __m256i __DEFAULT_FN_ATTRS
+static __inline__  __m256i __DEFAULT_FN_ATTRS256
 _mm256_mul_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmuldq256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mulhrs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhrsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mulhi_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhuw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mulhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pmulhw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mullo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a * (__v16hu)__b);
 }
 
-static __inline__  __m256i __DEFAULT_FN_ATTRS
+static __inline__  __m256i __DEFAULT_FN_ATTRS256
 _mm256_mullo_epi32 (__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a * (__v8su)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mul_epu32(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_pmuludq256((__v8si)__a, (__v8si)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_or_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a | (__v4du)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sad_epu8(__m256i __a, __m256i __b)
 {
   return __builtin_ia32_psadbw256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shuffle_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_pshufb256((__v32qi)__a, (__v32qi)__b);
@@ -496,19 +497,19 @@
 #define _mm256_shufflelo_epi16(a, imm) \
   (__m256i)__builtin_ia32_pshuflw256((__v16hi)(__m256i)(a), (int)(imm))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sign_epi8(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sign_epi16(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sign_epi32(__m256i __a, __m256i __b)
 {
     return (__m256i)__builtin_ia32_psignd256((__v8si)__a, (__v8si)__b);
@@ -520,61 +521,61 @@
 #define _mm256_bslli_epi128(a, imm) \
   (__m256i)__builtin_ia32_pslldqi256_byteshift((__v4di)(__m256i)(a), (int)(imm))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_slli_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psllwi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sll_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psllw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_slli_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_pslldi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sll_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_pslld256((__v8si)__a, (__v4si)__count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_slli_epi64(__m256i __a, int __count)
 {
   return __builtin_ia32_psllqi256((__v4di)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sll_epi64(__m256i __a, __m128i __count)
 {
   return __builtin_ia32_psllq256((__v4di)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srai_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrawi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sra_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psraw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srai_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psradi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sra_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrad256((__v8si)__a, (__v4si)__count);
@@ -586,176 +587,176 @@
 #define _mm256_bsrli_epi128(a, imm) \
   (__m256i)__builtin_ia32_psrldqi256_byteshift((__m256i)(a), (int)(imm))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srli_epi16(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrlwi256((__v16hi)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srl_epi16(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrlw256((__v16hi)__a, (__v8hi)__count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srli_epi32(__m256i __a, int __count)
 {
   return (__m256i)__builtin_ia32_psrldi256((__v8si)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srl_epi32(__m256i __a, __m128i __count)
 {
   return (__m256i)__builtin_ia32_psrld256((__v8si)__a, (__v4si)__count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srli_epi64(__m256i __a, int __count)
 {
   return __builtin_ia32_psrlqi256((__v4di)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srl_epi64(__m256i __a, __m128i __count)
 {
   return __builtin_ia32_psrlq256((__v4di)__a, __count);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sub_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v32qu)__a - (__v32qu)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sub_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v16hu)__a - (__v16hu)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sub_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v8su)__a - (__v8su)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sub_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a - (__v4du)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubsb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubsw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epu8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubusb256((__v32qi)__a, (__v32qi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_subs_epu16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_psubusw256((__v16hi)__a, (__v16hi)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpackhi_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 8, 32+8, 9, 32+9, 10, 32+10, 11, 32+11, 12, 32+12, 13, 32+13, 14, 32+14, 15, 32+15, 24, 32+24, 25, 32+25, 26, 32+26, 27, 32+27, 28, 32+28, 29, 32+29, 30, 32+30, 31, 32+31);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpackhi_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 4, 16+4, 5, 16+5, 6, 16+6, 7, 16+7, 12, 16+12, 13, 16+13, 14, 16+14, 15, 16+15);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpackhi_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 2, 8+2, 3, 8+3, 6, 8+6, 7, 8+7);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpackhi_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 1, 4+1, 3, 4+3);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpacklo_epi8(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v32qi)__a, (__v32qi)__b, 0, 32+0, 1, 32+1, 2, 32+2, 3, 32+3, 4, 32+4, 5, 32+5, 6, 32+6, 7, 32+7, 16, 32+16, 17, 32+17, 18, 32+18, 19, 32+19, 20, 32+20, 21, 32+21, 22, 32+22, 23, 32+23);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpacklo_epi16(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v16hi)__a, (__v16hi)__b, 0, 16+0, 1, 16+1, 2, 16+2, 3, 16+3, 8, 16+8, 9, 16+9, 10, 16+10, 11, 16+11);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpacklo_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v8si)__a, (__v8si)__b, 0, 8+0, 1, 8+1, 4, 8+4, 5, 8+5);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_unpacklo_epi64(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_shufflevector((__v4di)__a, (__v4di)__b, 0, 4+0, 2, 4+2);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_xor_si256(__m256i __a, __m256i __b)
 {
   return (__m256i)((__v4du)__a ^ (__v4du)__b);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_stream_load_si256(__m256i const *__V)
 {
   typedef __v4di __v4di_aligned __attribute__((aligned(32)));
   return (__m256i)__builtin_nontemporal_load((const __v4di_aligned *)__V);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_broadcastss_ps(__m128 __X)
 {
   return (__m128)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_broadcastsd_pd(__m128d __a)
 {
   return __builtin_shufflevector((__v2df)__a, (__v2df)__a, 0, 0);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_broadcastss_ps(__m128 __X)
 {
   return (__m256)__builtin_shufflevector((__v4sf)__X, (__v4sf)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_broadcastsd_pd(__m128d __X)
 {
   return (__m256d)__builtin_shufflevector((__v2df)__X, (__v2df)__X, 0, 0, 0, 0);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastsi128_si256(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 1, 0, 1);
@@ -769,56 +770,56 @@
   (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
                                      (__v8si)(__m256i)(V2), (int)(M))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastb_epi8(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastw_epi16(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastd_epi32(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastq_epi64(__m128i __X)
 {
   return (__m256i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0, 0, 0);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastb_epi8(__m128i __X)
 {
   return (__m128i)__builtin_shufflevector((__v16qi)__X, (__v16qi)__X, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastw_epi16(__m128i __X)
 {
   return (__m128i)__builtin_shufflevector((__v8hi)__X, (__v8hi)__X, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastd_epi32(__m128i __X)
 {
   return (__m128i)__builtin_shufflevector((__v4si)__X, (__v4si)__X, 0, 0, 0, 0);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastq_epi64(__m128i __X)
 {
   return (__m128i)__builtin_shufflevector((__v2di)__X, (__v2di)__X, 0, 0);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutevar8x32_epi32(__m256i __a, __m256i __b)
 {
   return (__m256i)__builtin_ia32_permvarsi256((__v8si)__a, (__v8si)__b);
@@ -827,7 +828,7 @@
 #define _mm256_permute4x64_pd(V, M) \
   (__m256d)__builtin_ia32_permdf256((__v4df)(__m256d)(V), (int)(M))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_permutevar8x32_ps(__m256 __a, __m256i __b)
 {
   return (__m256)__builtin_ia32_permvarsf256((__v8sf)__a, (__v8si)__b);
@@ -846,109 +847,109 @@
   (__m256i)__builtin_ia32_insert128i256((__v4di)(__m256i)(V1), \
                                         (__v2di)(__m128i)(V2), (int)(M))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskload_epi32(int const *__X, __m256i __M)
 {
   return (__m256i)__builtin_ia32_maskloadd256((const __v8si *)__X, (__v8si)__M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskload_epi64(long long const *__X, __m256i __M)
 {
   return (__m256i)__builtin_ia32_maskloadq256((const __v4di *)__X, (__v4di)__M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskload_epi32(int const *__X, __m128i __M)
 {
   return (__m128i)__builtin_ia32_maskloadd((const __v4si *)__X, (__v4si)__M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskload_epi64(long long const *__X, __m128i __M)
 {
   return (__m128i)__builtin_ia32_maskloadq((const __v2di *)__X, (__v2di)__M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_maskstore_epi32(int *__X, __m256i __M, __m256i __Y)
 {
   __builtin_ia32_maskstored256((__v8si *)__X, (__v8si)__M, (__v8si)__Y);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_maskstore_epi64(long long *__X, __m256i __M, __m256i __Y)
 {
   __builtin_ia32_maskstoreq256((__v4di *)__X, (__v4di)__M, (__v4di)__Y);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_maskstore_epi32(int *__X, __m128i __M, __m128i __Y)
 {
   __builtin_ia32_maskstored((__v4si *)__X, (__v4si)__M, (__v4si)__Y);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_maskstore_epi64(long long *__X, __m128i __M, __m128i __Y)
 {
   __builtin_ia32_maskstoreq(( __v2di *)__X, (__v2di)__M, (__v2di)__Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sllv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_sllv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sllv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psllv4di((__v4di)__X, (__v4di)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_sllv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psllv2di((__v2di)__X, (__v2di)__Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srav_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrav8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srav_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrav4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srlv_epi32(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv8si((__v8si)__X, (__v8si)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srlv_epi32(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv4si((__v4si)__X, (__v4si)__Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srlv_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psrlv4di((__v4di)__X, (__v4di)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srlv_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psrlv2di((__v2di)__X, (__v2di)__Y);
@@ -1161,6 +1162,7 @@
                                        (__v4di)(__m256i)(i), \
                                        (__v4di)_mm256_set1_epi64x(-1), (s))
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS256
+#undef __DEFAULT_FN_ATTRS128
 
 #endif /* __AVX2INTRIN_H */
Index: lib/Headers/avx512bitalgintrin.h
===================================================================
--- lib/Headers/avx512bitalgintrin.h
+++ lib/Headers/avx512bitalgintrin.h
@@ -29,7 +29,7 @@
 #define __AVX512BITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_popcnt_epi16(__m512i __A)
Index: lib/Headers/avx512bwintrin.h
===================================================================
--- lib/Headers/avx512bwintrin.h
+++ lib/Headers/avx512bwintrin.h
@@ -32,7 +32,7 @@
 typedef unsigned long long __mmask64;
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bw"), __min_vector_width__(512)))
 
 /* Integer compare */
 
Index: lib/Headers/avx512cdintrin.h
===================================================================
--- lib/Headers/avx512cdintrin.h
+++ lib/Headers/avx512cdintrin.h
@@ -29,7 +29,7 @@
 #define __AVX512CDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512cd"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_conflict_epi64 (__m512i __A)
Index: lib/Headers/avx512dqintrin.h
===================================================================
--- lib/Headers/avx512dqintrin.h
+++ lib/Headers/avx512dqintrin.h
@@ -29,7 +29,7 @@
 #define __AVX512DQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512dq"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_mullo_epi64 (__m512i __A, __m512i __B) {
Index: lib/Headers/avx512fintrin.h
===================================================================
--- lib/Headers/avx512fintrin.h
+++ lib/Headers/avx512fintrin.h
@@ -173,11 +173,12 @@
 } _MM_MANTISSA_SIGN_ENUM;
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512f")))
+#define __DEFAULT_FN_ATTRS512 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(512)))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512f"), __min_vector_width__(128)))
 
 /* Create vectors with repeated elements */
 
-static  __inline __m512i __DEFAULT_FN_ATTRS
+static  __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_setzero_si512(void)
 {
   return __extension__ (__m512i)(__v8di){ 0, 0, 0, 0, 0, 0, 0, 0 };
@@ -185,38 +186,38 @@
 
 #define _mm512_setzero_epi32 _mm512_setzero_si512
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_undefined_pd(void)
 {
   return (__m512d)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_undefined(void)
 {
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_undefined_ps(void)
 {
   return (__m512)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_undefined_epi32(void)
 {
   return (__m512i)__builtin_ia32_undef512();
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_broadcastd_epi32 (__m128i __A)
 {
   return (__m512i)__builtin_shufflevector((__v4si) __A, (__v4si) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcastd_epi32 (__m512i __O, __mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__M,
@@ -224,7 +225,7 @@
                                              (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcastd_epi32 (__mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__M,
@@ -232,14 +233,14 @@
                                              (__v16si) _mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_broadcastq_epi64 (__m128i __A)
 {
   return (__m512i)__builtin_shufflevector((__v2di) __A, (__v2di) __A,
                                           0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcastq_epi64 (__m512i __O, __mmask8 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512(__M,
@@ -248,7 +249,7 @@
 
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512(__M,
@@ -257,7 +258,7 @@
 }
 
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_setzero_ps(void)
 {
   return __extension__ (__m512){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,
@@ -266,26 +267,26 @@
 
 #define _mm512_setzero _mm512_setzero_ps
 
-static  __inline __m512d __DEFAULT_FN_ATTRS
+static  __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_setzero_pd(void)
 {
   return __extension__ (__m512d){ 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 };
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_set1_ps(float __w)
 {
   return __extension__ (__m512){ __w, __w, __w, __w, __w, __w, __w, __w,
                                  __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_set1_pd(double __w)
 {
   return __extension__ (__m512d){ __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set1_epi8(char __w)
 {
   return __extension__ (__m512i)(__v64qi){
@@ -299,7 +300,7 @@
     __w, __w, __w, __w, __w, __w, __w, __w  };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set1_epi16(short __w)
 {
   return __extension__ (__m512i)(__v32hi){
@@ -309,7 +310,7 @@
     __w, __w, __w, __w, __w, __w, __w, __w };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set1_epi32(int __s)
 {
   return __extension__ (__m512i)(__v16si){
@@ -317,7 +318,7 @@
     __s, __s, __s, __s, __s, __s, __s, __s };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_set1_epi32(__mmask16 __M, int __A) 
 {
   return (__m512i)__builtin_ia32_selectd_512(__M, 
@@ -325,13 +326,13 @@
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set1_epi64(long long __d)
 {
   return __extension__(__m512i)(__v8di){ __d, __d, __d, __d, __d, __d, __d, __d };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_set1_epi64(__mmask8 __M, long long __A)
 {
   return (__m512i)__builtin_ia32_selectq_512(__M,
@@ -339,14 +340,14 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_broadcastss_ps(__m128 __A)
 {
   return (__m512)__builtin_shufflevector((__v4sf) __A, (__v4sf) __A,
                                          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set4_epi32 (int __A, int __B, int __C, int __D)
 {
   return __extension__ (__m512i)(__v16si)
@@ -354,7 +355,7 @@
      __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set4_epi64 (long long __A, long long __B, long long __C,
        long long __D)
 {
@@ -362,14 +363,14 @@
    { __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_set4_pd (double __A, double __B, double __C, double __D)
 {
   return __extension__ (__m512d)
    { __D, __C, __B, __A, __D, __C, __B, __A };
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_set4_ps (float __A, float __B, float __C, float __D)
 {
   return __extension__ (__m512)
@@ -389,7 +390,7 @@
 #define _mm512_setr4_ps(e0,e1,e2,e3)                \
   _mm512_set4_ps((e3),(e2),(e1),(e0))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_broadcastsd_pd(__m128d __A)
 {
   return (__m512d)__builtin_shufflevector((__v2df) __A, (__v2df) __A,
@@ -398,122 +399,122 @@
 
 /* Cast between vector types */
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_castpd256_pd512(__m256d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_castps256_ps512(__m256 __a)
 {
   return __builtin_shufflevector(__a, __a, 0,  1,  2,  3,  4,  5,  6,  7,
                                           -1, -1, -1, -1, -1, -1, -1, -1);
 }
 
-static __inline __m128d __DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS512
 _mm512_castpd512_pd128(__m512d __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1);
 }
 
-static __inline __m256d __DEFAULT_FN_ATTRS
+static __inline __m256d __DEFAULT_FN_ATTRS512
 _mm512_castpd512_pd256 (__m512d __A)
 {
   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3);
 }
 
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS512
 _mm512_castps512_ps128(__m512 __a)
 {
   return __builtin_shufflevector(__a, __a, 0, 1, 2, 3);
 }
 
-static __inline __m256 __DEFAULT_FN_ATTRS
+static __inline __m256 __DEFAULT_FN_ATTRS512
 _mm512_castps512_ps256 (__m512 __A)
 {
   return __builtin_shufflevector(__A, __A, 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_castpd_ps (__m512d __A)
 {
   return (__m512) (__A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_castpd_si512 (__m512d __A)
 {
   return (__m512i) (__A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_castpd128_pd512 (__m128d __A)
 {
   return __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_castps_pd (__m512 __A)
 {
   return (__m512d) (__A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_castps_si512 (__m512 __A)
 {
   return (__m512i) (__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_castps128_ps512 (__m128 __A)
 {
     return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_castsi128_si512 (__m128i __A)
 {
    return  __builtin_shufflevector( __A, __A, 0, 1, -1, -1, -1, -1, -1, -1);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_castsi256_si512 (__m256i __A)
 {
    return  __builtin_shufflevector( __A, __A, 0, 1, 2, 3, -1, -1, -1, -1);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_castsi512_ps (__m512i __A)
 {
   return (__m512) (__A);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_castsi512_pd (__m512i __A)
 {
   return (__m512d) (__A);
 }
 
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS512
 _mm512_castsi512_si128 (__m512i __A)
 {
   return (__m128i)__builtin_shufflevector(__A, __A , 0, 1);
 }
 
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS512
 _mm512_castsi512_si256 (__m512i __A)
 {
   return (__m256i)__builtin_shufflevector(__A, __A , 0, 1, 2, 3);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_int2mask(int __a)
 {
   return (__mmask16)__a;
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask2int(__mmask16 __a)
 {
   return (int)__a;
@@ -532,7 +533,7 @@
 ///    A 128-bit vector of [2 x double].
 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_zextpd128_pd512(__m128d __a)
 {
   return __builtin_shufflevector((__v2df)__a, (__v2df)_mm_setzero_pd(), 0, 1, 2, 3, 2, 3, 2, 3);
@@ -551,7 +552,7 @@
 ///    A 256-bit vector of [4 x double].
 /// \returns A 512-bit floating-point vector of [8 x double]. The lower 256 bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_zextpd256_pd512(__m256d __a)
 {
   return __builtin_shufflevector((__v4df)__a, (__v4df)_mm256_setzero_pd(), 0, 1, 2, 3, 4, 5, 6, 7);
@@ -569,7 +570,7 @@
 ///    A 128-bit vector of [4 x float].
 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 128 bits
 ///    contain the value of the parameter. The upper 384 bits are set to zero.
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_zextps128_ps512(__m128 __a)
 {
   return __builtin_shufflevector((__v4sf)__a, (__v4sf)_mm_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 4, 5, 6, 7, 4, 5, 6, 7);
@@ -587,7 +588,7 @@
 ///    A 256-bit vector of [8 x float].
 /// \returns A 512-bit floating-point vector of [16 x float]. The lower 256 bits
 ///    contain the value of the parameter. The upper 256 bits are set to zero.
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_zextps256_ps512(__m256 __a)
 {
   return __builtin_shufflevector((__v8sf)__a, (__v8sf)_mm256_setzero_ps(), 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
@@ -605,7 +606,7 @@
 ///    A 128-bit integer vector.
 /// \returns A 512-bit integer vector. The lower 128 bits contain the value of
 ///    the parameter. The upper 384 bits are set to zero.
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_zextsi128_si512(__m128i __a)
 {
   return __builtin_shufflevector((__v2di)__a, (__v2di)_mm_setzero_si128(), 0, 1, 2, 3, 2, 3, 2, 3);
@@ -623,20 +624,20 @@
 ///    A 256-bit integer vector.
 /// \returns A 512-bit integer vector. The lower 256 bits contain the value of
 ///    the parameter. The upper 256 bits are set to zero.
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_zextsi256_si512(__m256i __a)
 {
   return __builtin_shufflevector((__v4di)__a, (__v4di)_mm256_setzero_si256(), 0, 1, 2, 3, 4, 5, 6, 7);
 }
 
 /* Bitwise operators */
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_and_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a & (__v16su)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_and_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
@@ -644,20 +645,20 @@
                 (__v16si) __src);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_and_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) _mm512_mask_and_epi32(_mm512_setzero_si512 (),
                                          __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_and_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_and_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
     return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __k,
@@ -665,26 +666,26 @@
                 (__v8di) __src);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_and_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i) _mm512_mask_and_epi64(_mm512_setzero_si512 (),
                                          __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_andnot_si512 (__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v8du)__A & (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_andnot_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v16su)__A & (__v16su)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_andnot_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -692,20 +693,20 @@
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_andnot_epi32(__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)_mm512_mask_andnot_epi32(_mm512_setzero_si512(),
                                            __U, __A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_andnot_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i)(~(__v8du)__A & (__v8du)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_andnot_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -713,20 +714,20 @@
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_andnot_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)_mm512_mask_andnot_epi64(_mm512_setzero_si512(),
                                            __U, __A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_or_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a | (__v16su)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_or_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
@@ -734,19 +735,19 @@
                                              (__v16si)__src);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_or_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)_mm512_mask_or_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_or_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a | (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_or_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
@@ -754,19 +755,19 @@
                                              (__v8di)__src);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_or_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)_mm512_mask_or_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_xor_epi32(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v16su)__a ^ (__v16su)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_xor_epi32(__m512i __src, __mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__k,
@@ -774,19 +775,19 @@
                                             (__v16si)__src);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_xor_epi32(__mmask16 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)_mm512_mask_xor_epi32(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_xor_epi64(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a ^ (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_xor_epi64(__m512i __src, __mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__k,
@@ -794,25 +795,25 @@
                                              (__v8di)__src);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_xor_epi64(__mmask8 __k, __m512i __a, __m512i __b)
 {
   return (__m512i)_mm512_mask_xor_epi64(_mm512_setzero_si512(), __k, __a, __b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_and_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a & (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_or_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a | (__v8du)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_xor_si512(__m512i __a, __m512i __b)
 {
   return (__m512i)((__v8du)__a ^ (__v8du)__b);
@@ -820,49 +821,49 @@
 
 /* Arithmetic */
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_add_pd(__m512d __a, __m512d __b)
 {
   return (__m512d)((__v8df)__a + (__v8df)__b);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_add_ps(__m512 __a, __m512 __b)
 {
   return (__m512)((__v16sf)__a + (__v16sf)__b);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_mul_pd(__m512d __a, __m512d __b)
 {
   return (__m512d)((__v8df)__a * (__v8df)__b);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_mul_ps(__m512 __a, __m512 __b)
 {
   return (__m512)((__v16sf)__a * (__v16sf)__b);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_sub_pd(__m512d __a, __m512d __b)
 {
   return (__m512d)((__v8df)__a - (__v8df)__b);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_sub_ps(__m512 __a, __m512 __b)
 {
   return (__m512)((__v16sf)__a - (__v16sf)__b);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_add_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8du) __A + (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_add_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -870,7 +871,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -878,13 +879,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sub_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v8du) __A - (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -892,7 +893,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sub_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -900,13 +901,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_add_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A + (__v16su) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_add_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -914,7 +915,7 @@
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -922,13 +923,13 @@
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sub_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A - (__v16su) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -936,7 +937,7 @@
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sub_epi32(__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -958,14 +959,14 @@
                                    (__v8df)_mm512_max_round_pd((A), (B), (R)), \
                                    (__v8df)_mm512_setzero_pd())
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_max_pd(__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_maxpd512((__v8df) __A, (__v8df) __B,
                                            _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_max_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -973,7 +974,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_pd (__mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -995,14 +996,14 @@
                                   (__v16sf)_mm512_max_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps())
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_max_ps(__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_maxps512((__v16sf) __A, (__v16sf) __B,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_max_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -1010,7 +1011,7 @@
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_ps (__mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -1018,7 +1019,7 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_max_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
@@ -1027,7 +1028,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_max_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_maxss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
@@ -1054,7 +1055,7 @@
                                           (__v4sf)_mm_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_max_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -1063,7 +1064,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_max_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_maxsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -1091,13 +1092,13 @@
                                            (__mmask8)(U), (int)(R))
 
 static __inline __m512i
-__DEFAULT_FN_ATTRS
+__DEFAULT_FN_ATTRS512
 _mm512_max_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pmaxsd512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_max_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1105,7 +1106,7 @@
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1113,13 +1114,13 @@
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu32(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pmaxud512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_max_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1127,7 +1128,7 @@
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1135,13 +1136,13 @@
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pmaxsq512((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_max_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1149,7 +1150,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1157,13 +1158,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_max_epu64(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pmaxuq512((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_max_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1171,7 +1172,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_max_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1193,14 +1194,14 @@
                                    (__v8df)_mm512_min_round_pd((A), (B), (R)), \
                                    (__v8df)_mm512_setzero_pd())
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_min_pd(__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_minpd512((__v8df) __A, (__v8df) __B,
                                            _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_min_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -1208,7 +1209,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_pd (__mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -1230,14 +1231,14 @@
                                   (__v16sf)_mm512_min_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps())
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_min_ps(__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_minps512((__v16sf) __A, (__v16sf) __B,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_min_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -1245,7 +1246,7 @@
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_ps (__mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -1253,7 +1254,7 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_min_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
@@ -1262,7 +1263,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_min_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_minss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
@@ -1289,7 +1290,7 @@
                                           (__v4sf)_mm_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_min_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -1298,7 +1299,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_min_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_minsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -1326,13 +1327,13 @@
                                            (__mmask8)(U), (int)(R))
 
 static __inline __m512i
-__DEFAULT_FN_ATTRS
+__DEFAULT_FN_ATTRS512
 _mm512_min_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pminsd512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_min_epi32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1340,7 +1341,7 @@
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_epi32 (__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1348,13 +1349,13 @@
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu32(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pminud512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_min_epu32 (__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1362,7 +1363,7 @@
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_epu32 (__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1370,13 +1371,13 @@
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pminsq512((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_min_epi64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1384,7 +1385,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_epi64 (__mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1392,13 +1393,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_min_epu64(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_pminuq512((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_min_epu64 (__m512i __W, __mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1406,7 +1407,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_min_epu64 (__mmask8 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1414,13 +1415,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mul_epi32(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_pmuldq512((__v16si)__X, (__v16si) __Y);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_mul_epi32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1428,7 +1429,7 @@
                                              (__v8di)__W);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_mul_epi32(__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1436,13 +1437,13 @@
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mul_epu32(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_pmuludq512((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_mul_epu32(__m512i __W, __mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1450,7 +1451,7 @@
                                              (__v8di)__W);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_mul_epu32(__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -1458,13 +1459,13 @@
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mullo_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i) ((__v16su) __A * (__v16su) __B);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_mullo_epi32(__mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1472,7 +1473,7 @@
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_mullo_epi32(__m512i __W, __mmask16 __M, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -1480,12 +1481,12 @@
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mullox_epi64 (__m512i __A, __m512i __B) {
   return (__m512i) ((__v8du) __A * (__v8du) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_mullox_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B) {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
                                              (__v8di)_mm512_mullox_epi64(__A, __B),
@@ -1505,14 +1506,14 @@
                                        (__v8df)_mm512_sqrt_round_pd((A), (R)), \
                                        (__v8df)_mm512_setzero_pd())
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_sqrt_pd(__m512d __A)
 {
   return (__m512d)__builtin_ia32_sqrtpd512((__v8df)__A,
                                            _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_sqrt_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -1520,7 +1521,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_sqrt_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -1541,14 +1542,14 @@
                                       (__v16sf)_mm512_sqrt_round_ps((A), (R)), \
                                       (__v16sf)_mm512_setzero_ps())
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_sqrt_ps(__m512 __A)
 {
   return (__m512)__builtin_ia32_sqrtps512((__v16sf)__A,
                                           _MM_FROUND_CUR_DIRECTION);
 }
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_sqrt_ps(__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -1556,7 +1557,7 @@
                                              (__v16sf)__W);
 }
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_sqrt_ps( __mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -1564,7 +1565,7 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_rsqrt14_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
@@ -1572,7 +1573,7 @@
                  _mm512_setzero_pd (),
                  (__mmask8) -1);}
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_rsqrt14_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
@@ -1580,7 +1581,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_rsqrt14_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_rsqrt14pd512_mask ((__v8df) __A,
@@ -1589,7 +1590,7 @@
                   (__mmask8) __U);
 }
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_rsqrt14_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
@@ -1598,7 +1599,7 @@
                 (__mmask16) -1);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_rsqrt14_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
@@ -1606,7 +1607,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_rsqrt14_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_rsqrt14ps512_mask ((__v16sf) __A,
@@ -1615,7 +1616,7 @@
                  (__mmask16) __U);
 }
 
-static  __inline__ __m128 __DEFAULT_FN_ATTRS
+static  __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_rsqrt14_ss(__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
@@ -1625,7 +1626,7 @@
              (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_rsqrt14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
@@ -1634,7 +1635,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_rsqrt14_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_rsqrt14ss_mask ((__v4sf) __A,
@@ -1643,7 +1644,7 @@
           (__mmask8) __U);
 }
 
-static  __inline__ __m128d __DEFAULT_FN_ATTRS
+static  __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_rsqrt14_sd(__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rsqrt14sd_mask ((__v2df) __A,
@@ -1653,7 +1654,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_rsqrt14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
@@ -1662,7 +1663,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_rsqrt14_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_rsqrt14sd_mask ( (__v2df) __A,
@@ -1671,7 +1672,7 @@
           (__mmask8) __U);
 }
 
-static  __inline__ __m512d __DEFAULT_FN_ATTRS
+static  __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_rcp14_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
@@ -1680,7 +1681,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_rcp14_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
@@ -1688,7 +1689,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_rcp14_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_rcp14pd512_mask ((__v8df) __A,
@@ -1697,7 +1698,7 @@
                 (__mmask8) __U);
 }
 
-static  __inline__ __m512 __DEFAULT_FN_ATTRS
+static  __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_rcp14_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
@@ -1706,7 +1707,7 @@
               (__mmask16) -1);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_rcp14_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
@@ -1714,7 +1715,7 @@
                    (__mmask16) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_rcp14_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_rcp14ps512_mask ((__v16sf) __A,
@@ -1723,7 +1724,7 @@
                    (__mmask16) __U);
 }
 
-static  __inline__ __m128 __DEFAULT_FN_ATTRS
+static  __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_rcp14_ss(__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
@@ -1733,7 +1734,7 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_rcp14_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
@@ -1742,7 +1743,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_rcp14_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_rcp14ss_mask ((__v4sf) __A,
@@ -1751,7 +1752,7 @@
           (__mmask8) __U);
 }
 
-static  __inline__ __m128d __DEFAULT_FN_ATTRS
+static  __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_rcp14_sd(__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_rcp14sd_mask ((__v2df) __A,
@@ -1761,7 +1762,7 @@
             (__mmask8) -1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_rcp14_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
@@ -1770,7 +1771,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_rcp14_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_rcp14sd_mask ( (__v2df) __A,
@@ -1779,7 +1780,7 @@
           (__mmask8) __U);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_floor_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -1788,7 +1789,7 @@
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_floor_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -1797,7 +1798,7 @@
                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_floor_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -1806,7 +1807,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_floor_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -1815,7 +1816,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_ceil_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -1824,7 +1825,7 @@
                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_ceil_ps(__m512 __A)
 {
   return (__m512) __builtin_ia32_rndscaleps_mask ((__v16sf) __A,
@@ -1833,7 +1834,7 @@
                                                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_ceil_pd(__m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -1842,7 +1843,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_ceil_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_rndscalepd_mask ((__v8df) __A,
@@ -1851,13 +1852,13 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi64(__m512i __A)
 {
   return (__m512i)__builtin_ia32_pabsq512((__v8di)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_abs_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -1865,7 +1866,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_abs_epi64 (__mmask8 __U, __m512i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -1873,13 +1874,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_abs_epi32(__m512i __A)
 {
   return (__m512i)__builtin_ia32_pabsd512((__v16si) __A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_abs_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -1887,7 +1888,7 @@
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_abs_epi32 (__mmask16 __U, __m512i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -1895,14 +1896,14 @@
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_add_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   __A = _mm_add_ss(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : __W[0];
   return __A;
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_add_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   __A = _mm_add_ss(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : 0;
@@ -1927,14 +1928,14 @@
                                           (__v4sf)_mm_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_add_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   __A = _mm_add_sd(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : __W[0];
   return __A;
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_add_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   __A = _mm_add_sd(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : 0;
@@ -1958,28 +1959,28 @@
                                            (__v2df)_mm_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_add_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_add_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_add_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_add_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_add_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_add_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_add_ps(__A, __B),
@@ -2014,14 +2015,14 @@
                                   (__v16sf)_mm512_add_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps());
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_sub_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   __A = _mm_sub_ss(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : __W[0];
   return __A;
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_sub_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   __A = _mm_sub_ss(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : 0;
@@ -2045,14 +2046,14 @@
                                           (__v4sf)_mm_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_sub_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   __A = _mm_sub_sd(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : __W[0];
   return __A;
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_sub_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   __A = _mm_sub_sd(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : 0;
@@ -2077,28 +2078,28 @@
                                            (__v2df)_mm_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_sub_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_sub_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_sub_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_sub_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_sub_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_sub_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_sub_ps(__A, __B),
@@ -2133,14 +2134,14 @@
                                   (__v16sf)_mm512_sub_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps());
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_mul_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   __A = _mm_mul_ss(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : __W[0];
   return __A;
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_mul_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   __A = _mm_mul_ss(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : 0;
@@ -2164,14 +2165,14 @@
                                           (__v4sf)_mm_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_mul_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   __A = _mm_mul_sd(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : __W[0];
   return __A;
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_mul_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   __A = _mm_mul_sd(__A, __B);
   __A[0] = (__U & 1) ? __A[0] : 0;
@@ -2196,28 +2197,28 @@
                                            (__v2df)_mm_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_mul_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_mul_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_mul_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_mul_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_mul_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_mul_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_mul_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_mul_ps(__A, __B),
@@ -2252,7 +2253,7 @@
                                   (__v16sf)_mm512_mul_round_ps((A), (B), (R)), \
                                   (__v16sf)_mm512_setzero_ps());
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_div_ss(__m128 __W, __mmask8 __U,__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
@@ -2261,7 +2262,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_div_ss(__mmask8 __U,__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_divss_round_mask ((__v4sf) __A,
                 (__v4sf) __B,
@@ -2288,7 +2289,7 @@
                                           (__v4sf)_mm_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_div_sd(__m128d __W, __mmask8 __U,__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -2297,7 +2298,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_div_sd(__mmask8 __U,__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_divsd_round_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -2324,40 +2325,40 @@
                                            (__v2df)_mm_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_div_pd(__m512d __a, __m512d __b)
 {
   return (__m512d)((__v8df)__a/(__v8df)__b);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_div_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_div_pd(__A, __B),
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_div_pd(__mmask8 __U, __m512d __A, __m512d __B) {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
                                               (__v8df)_mm512_div_pd(__A, __B),
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_div_ps(__m512 __a, __m512 __b)
 {
   return (__m512)((__v16sf)__a/(__v16sf)__b);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_div_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_div_ps(__A, __B),
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_div_ps(__mmask16 __U, __m512 __A, __m512 __B) {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
                                              (__v16sf)_mm512_div_ps(__A, __B),
@@ -2540,7 +2541,7 @@
                                             (__mmask8)(U), (int)(R))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_fmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -2550,7 +2551,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_fmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -2560,7 +2561,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 ((__v8df) __A,
@@ -2570,7 +2571,7 @@
                                                      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
@@ -2580,7 +2581,7 @@
                                                      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_fmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -2590,7 +2591,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_fmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -2600,7 +2601,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_maskz ((__v8df) __A,
@@ -2610,7 +2611,7 @@
                                                      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -2620,7 +2621,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask3_fnmadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask3 (-(__v8df) __A,
@@ -2630,7 +2631,7 @@
                                                      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_fnmadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
@@ -2640,7 +2641,7 @@
                                                      _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -2650,7 +2651,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_fnmsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_maskz (-(__v8df) __A,
@@ -2744,7 +2745,7 @@
                                            (__mmask16)(U), (int)(R))
 
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_fmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -2754,7 +2755,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_fmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -2764,7 +2765,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask3 ((__v16sf) __A,
@@ -2774,7 +2775,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
@@ -2784,7 +2785,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_fmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -2794,7 +2795,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_fmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -2804,7 +2805,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_maskz ((__v16sf) __A,
@@ -2814,7 +2815,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -2824,7 +2825,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask3_fnmadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask3 (-(__v16sf) __A,
@@ -2834,7 +2835,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_fnmadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
@@ -2844,7 +2845,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -2854,7 +2855,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_fnmsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_maskz (-(__v16sf) __A,
@@ -2913,7 +2914,7 @@
                                                (__mmask8)(U), (int)(R))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
@@ -2923,7 +2924,7 @@
                                                       _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_fmaddsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
@@ -2933,7 +2934,7 @@
                                                       _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmaddsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask3 ((__v8df) __A,
@@ -2943,7 +2944,7 @@
                                                        _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmaddsub_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
@@ -2953,7 +2954,7 @@
                                                        _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
@@ -2963,7 +2964,7 @@
                                                        _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_fmsubadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_mask ((__v8df) __A,
@@ -2973,7 +2974,7 @@
                                                        _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmsubadd_pd(__mmask8 __U, __m512d __A, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddsubpd512_maskz ((__v8df) __A,
@@ -3032,7 +3033,7 @@
                                               (__mmask16)(U), (int)(R))
 
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
@@ -3042,7 +3043,7 @@
                                                       _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_fmaddsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
@@ -3052,7 +3053,7 @@
                                                       _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmaddsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_mask3 ((__v16sf) __A,
@@ -3062,7 +3063,7 @@
                                                        _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmaddsub_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
@@ -3072,7 +3073,7 @@
                                                        _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
@@ -3082,7 +3083,7 @@
                                                       _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_fmsubadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_mask ((__v16sf) __A,
@@ -3092,7 +3093,7 @@
                                                       _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_fmsubadd_ps(__mmask16 __U, __m512 __A, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddsubps512_maskz ((__v16sf) __A,
@@ -3109,7 +3110,7 @@
                                             (__mmask8)(U), (int)(R))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
   return (__m512d)__builtin_ia32_vfmsubpd512_mask3 ((__v8df) __A,
@@ -3125,7 +3126,7 @@
                                            (__v16sf)(__m512)(C), \
                                            (__mmask16)(U), (int)(R))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
   return (__m512)__builtin_ia32_vfmsubps512_mask3 ((__v16sf) __A,
@@ -3142,7 +3143,7 @@
                                                (__mmask8)(U), (int)(R))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmsubadd_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
   return (__m512d)__builtin_ia32_vfmsubaddpd512_mask3 ((__v8df) __A,
@@ -3159,7 +3160,7 @@
                                               (__mmask16)(U), (int)(R))
 
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask3_fmsubadd_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
   return (__m512)__builtin_ia32_vfmsubaddps512_mask3 ((__v16sf) __A,
@@ -3176,7 +3177,7 @@
                                            (__mmask8)(U), (int)(R))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_fnmadd_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -3193,7 +3194,7 @@
                                           (__mmask16)(U), (int)(R))
 
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_fnmadd_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -3217,7 +3218,7 @@
                                             (__mmask8)(U), (int)(R))
 
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_fnmsub_pd(__m512d __A, __mmask8 __U, __m512d __B, __m512d __C)
 {
   return (__m512d) __builtin_ia32_vfmaddpd512_mask ((__v8df) __A,
@@ -3227,7 +3228,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask3_fnmsub_pd(__m512d __A, __m512d __B, __m512d __C, __mmask8 __U)
 {
   return (__m512d) __builtin_ia32_vfmsubpd512_mask3 (-(__v8df) __A,
@@ -3251,7 +3252,7 @@
                                            (__mmask16)(U), (int)(R))
 
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_fnmsub_ps(__m512 __A, __mmask16 __U, __m512 __B, __m512 __C)
 {
   return (__m512) __builtin_ia32_vfmaddps512_mask ((__v16sf) __A,
@@ -3261,7 +3262,7 @@
                                                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask3_fnmsub_ps(__m512 __A, __m512 __B, __m512 __C, __mmask16 __U)
 {
   return (__m512) __builtin_ia32_vfmsubps512_mask3 (-(__v16sf) __A,
@@ -3275,14 +3276,14 @@
 
 /* Vector permutations */
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_permutex2var_epi32(__m512i __A, __m512i __I, __m512i __B)
 {
   return (__m512i)__builtin_ia32_vpermi2vard512((__v16si)__A, (__v16si) __I,
                                                 (__v16si) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_permutex2var_epi32(__m512i __A, __mmask16 __U, __m512i __I,
                                __m512i __B)
 {
@@ -3291,7 +3292,7 @@
                               (__v16si)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask2_permutex2var_epi32(__m512i __A, __m512i __I, __mmask16 __U,
                                 __m512i __B)
 {
@@ -3300,7 +3301,7 @@
                               (__v16si)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutex2var_epi32(__mmask16 __U, __m512i __A, __m512i __I,
                                 __m512i __B)
 {
@@ -3309,14 +3310,14 @@
                               (__v16si)_mm512_setzero_si512());
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_permutex2var_epi64(__m512i __A, __m512i __I, __m512i __B)
 {
   return (__m512i)__builtin_ia32_vpermi2varq512((__v8di)__A, (__v8di) __I,
                                                 (__v8di) __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_permutex2var_epi64(__m512i __A, __mmask8 __U, __m512i __I,
                                __m512i __B)
 {
@@ -3325,7 +3326,7 @@
                                (__v8di)__A);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask2_permutex2var_epi64(__m512i __A, __m512i __I, __mmask8 __U,
                                 __m512i __B)
 {
@@ -3334,7 +3335,7 @@
                                (__v8di)__I);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutex2var_epi64(__mmask8 __U, __m512i __A, __m512i __I,
                                 __m512i __B)
 {
@@ -3404,7 +3405,7 @@
 
 /* Vector Blend */
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_blend_pd(__mmask8 __U, __m512d __A, __m512d __W)
 {
   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
@@ -3412,7 +3413,7 @@
                  (__v8df) __A);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_blend_ps(__mmask16 __U, __m512 __A, __m512 __W)
 {
   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
@@ -3420,7 +3421,7 @@
                 (__v16sf) __A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_blend_epi64(__mmask8 __U, __m512i __A, __m512i __W)
 {
   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
@@ -3428,7 +3429,7 @@
                 (__v8di) __A);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_blend_epi32(__mmask16 __U, __m512i __A, __m512i __W)
 {
   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
@@ -3566,7 +3567,7 @@
                                              (__mmask16)(U), (int)(R))
 
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvttps_epu32(__m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
@@ -3576,7 +3577,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvttps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
@@ -3585,7 +3586,7 @@
                    _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvttps_epu32 (__mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2udq512_mask ((__v16sf) __A,
@@ -3624,13 +3625,13 @@
                                            (__v16sf)_mm512_setzero_ps(), \
                                            (__mmask16)(U), (int)(R))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_cvtepu32_ps (__m512i __A)
 {
   return (__m512)__builtin_convertvector((__v16su)__A, __v16sf);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu32_ps (__m512 __W, __mmask16 __U, __m512i __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -3638,7 +3639,7 @@
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu32_ps (__mmask16 __U, __m512i __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -3646,13 +3647,13 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_cvtepi32_pd(__m256i __A)
 {
   return (__m512d)__builtin_convertvector((__v8si)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_pd (__m512d __W, __mmask8 __U, __m256i __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -3660,7 +3661,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi32_pd (__mmask8 __U, __m256i __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -3668,25 +3669,25 @@
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_cvtepi32lo_pd(__m512i __A)
 {
   return (__m512d) _mm512_cvtepi32_pd(_mm512_castsi512_si256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
 {
   return (__m512d) _mm512_mask_cvtepi32_pd(__W, __U, _mm512_castsi512_si256(__A));
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_cvtepi32_ps (__m512i __A)
 {
   return (__m512)__builtin_convertvector((__v16si)__A, __v16sf);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_ps (__m512 __W, __mmask16 __U, __m512i __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -3694,7 +3695,7 @@
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi32_ps (__mmask16 __U, __m512i __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -3702,13 +3703,13 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_cvtepu32_pd(__m256i __A)
 {
   return (__m512d)__builtin_convertvector((__v8su)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu32_pd (__m512d __W, __mmask8 __U, __m256i __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -3716,7 +3717,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu32_pd (__mmask8 __U, __m256i __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -3724,13 +3725,13 @@
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_cvtepu32lo_pd(__m512i __A)
 {
   return (__m512d) _mm512_cvtepu32_pd(_mm512_castsi512_si256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu32lo_pd(__m512d __W, __mmask8 __U,__m512i __A)
 {
   return (__m512d) _mm512_mask_cvtepu32_pd(__W, __U, _mm512_castsi512_si256(__A));
@@ -3751,7 +3752,7 @@
                                           (__v8sf)_mm256_setzero_ps(), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS512
 _mm512_cvtpd_ps (__m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3760,7 +3761,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtpd_ps (__m256 __W, __mmask8 __U, __m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3769,7 +3770,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtpd_ps (__mmask8 __U, __m512d __A)
 {
   return (__m256) __builtin_ia32_cvtpd2ps512_mask ((__v8df) __A,
@@ -3778,7 +3779,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_cvtpd_pslo (__m512d __A)
 {
   return (__m512) __builtin_shufflevector((__v8sf) _mm512_cvtpd_ps(__A),
@@ -3786,7 +3787,7 @@
                 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtpd_pslo (__m512 __W, __mmask8 __U,__m512d __A)
 {
   return (__m512) __builtin_shufflevector (
@@ -3842,7 +3843,7 @@
                                            (__mmask16)(U), (int)(R))
 
 
-static  __inline __m512 __DEFAULT_FN_ATTRS
+static  __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_cvtph_ps(__m256i __A)
 {
   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
@@ -3852,7 +3853,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtph_ps (__m512 __W, __mmask16 __U, __m256i __A)
 {
   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
@@ -3861,7 +3862,7 @@
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtph_ps (__mmask16 __U, __m256i __A)
 {
   return (__m512) __builtin_ia32_vcvtph2ps512_mask ((__v16hi) __A,
@@ -3885,7 +3886,7 @@
                                             (__v8si)_mm256_setzero_si256(), \
                                             (__mmask8)(U), (int)(R))
 
-static __inline __m256i __DEFAULT_FN_ATTRS
+static __inline __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvttpd_epi32(__m512d __a)
 {
   return (__m256i)__builtin_ia32_cvttpd2dq512_mask((__v8df) __a,
@@ -3894,7 +3895,7 @@
                                                     _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvttpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
@@ -3903,7 +3904,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvttpd_epi32 (__mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2dq512_mask ((__v8df) __A,
@@ -3927,7 +3928,7 @@
                                             (__v16si)_mm512_setzero_si512(), \
                                             (__mmask16)(U), (int)(R))
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvttps_epi32(__m512 __a)
 {
   return (__m512i)
@@ -3936,7 +3937,7 @@
                                      (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvttps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
@@ -3945,7 +3946,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvttps_epi32 (__mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvttps2dq512_mask ((__v16sf) __A,
@@ -3969,7 +3970,7 @@
                                            (__v16si)_mm512_setzero_si512(), \
                                            (__mmask16)(U), (int)(R))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtps_epi32 (__m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
@@ -3978,7 +3979,7 @@
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
@@ -3987,7 +3988,7 @@
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtps_epi32 (__mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2dq512_mask ((__v16sf) __A,
@@ -4012,7 +4013,7 @@
                                            (__v8si)_mm256_setzero_si256(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtpd_epi32 (__m512d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
@@ -4022,7 +4023,7 @@
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
@@ -4031,7 +4032,7 @@
                  _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtpd_epi32 (__mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2dq512_mask ((__v8df) __A,
@@ -4056,7 +4057,7 @@
                                             (__v16si)_mm512_setzero_si512(), \
                                             (__mmask16)(U), (int)(R))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtps_epu32 ( __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,\
@@ -4066,7 +4067,7 @@
                   _MM_FROUND_CUR_DIRECTION);\
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
@@ -4075,7 +4076,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtps_epu32 ( __mmask16 __U, __m512 __A)
 {
   return (__m512i) __builtin_ia32_cvtps2udq512_mask ((__v16sf) __A,
@@ -4100,7 +4101,7 @@
                                             (__v8si)_mm256_setzero_si256(), \
                                             (__mmask8)(U), (int)(R))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtpd_epu32 (__m512d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
@@ -4110,7 +4111,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
@@ -4119,7 +4120,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtpd_epu32 (__mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvtpd2udq512_mask ((__v8df) __A,
@@ -4129,13 +4130,13 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_cvtsd_f64(__m512d __a)
 {
   return __a[0];
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_cvtss_f32(__m512 __a)
 {
   return __a[0];
@@ -4143,14 +4144,14 @@
 
 /* Unpack and Interleave */
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_unpackhi_pd(__m512d __a, __m512d __b)
 {
   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_unpackhi_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -4158,7 +4159,7 @@
                                            (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpackhi_pd(__mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -4166,14 +4167,14 @@
                                            (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_unpacklo_pd(__m512d __a, __m512d __b)
 {
   return (__m512d)__builtin_shufflevector((__v8df)__a, (__v8df)__b,
                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_unpacklo_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -4181,7 +4182,7 @@
                                            (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpacklo_pd (__mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8) __U,
@@ -4189,7 +4190,7 @@
                                            (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_unpackhi_ps(__m512 __a, __m512 __b)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
@@ -4199,7 +4200,7 @@
                                          2+12, 18+12, 3+12, 19+12);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_unpackhi_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
@@ -4207,7 +4208,7 @@
                                           (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpackhi_ps (__mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
@@ -4215,7 +4216,7 @@
                                           (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_unpacklo_ps(__m512 __a, __m512 __b)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__a, (__v16sf)__b,
@@ -4225,7 +4226,7 @@
                                          0+12, 16+12, 1+12, 17+12);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_unpacklo_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
@@ -4233,7 +4234,7 @@
                                           (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpacklo_ps (__mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16) __U,
@@ -4241,7 +4242,7 @@
                                           (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_unpackhi_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
@@ -4251,7 +4252,7 @@
                                           2+12, 18+12, 3+12, 19+12);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_unpackhi_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
@@ -4259,7 +4260,7 @@
                                        (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpackhi_epi32(__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
@@ -4267,7 +4268,7 @@
                                        (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_unpacklo_epi32(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_shufflevector((__v16si)__A, (__v16si)__B,
@@ -4277,7 +4278,7 @@
                                           0+12, 16+12, 1+12, 17+12);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_unpacklo_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
@@ -4285,7 +4286,7 @@
                                        (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpacklo_epi32(__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16) __U,
@@ -4293,14 +4294,14 @@
                                        (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_unpackhi_epi64(__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
                                           1, 9, 1+2, 9+2, 1+4, 9+4, 1+6, 9+6);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_unpackhi_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
@@ -4308,7 +4309,7 @@
                                         (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpackhi_epi64(__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
@@ -4316,14 +4317,14 @@
                                         (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_unpacklo_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_shufflevector((__v8di)__A, (__v8di)__B,
                                           0, 8, 0+2, 8+2, 0+4, 8+4, 0+6, 8+6);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_unpacklo_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
@@ -4331,7 +4332,7 @@
                                         (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_unpacklo_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8) __U,
@@ -4342,7 +4343,7 @@
 
 /* SIMD load ops */
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_loadu_si512 (void const *__P)
 {
   struct __loadu_si512 {
@@ -4351,7 +4352,7 @@
   return ((struct __loadu_si512*)__P)->__v;
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_loadu_epi32 (__m512i __W, __mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *) __P,
@@ -4360,7 +4361,7 @@
 }
 
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_loadu_epi32(__mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqusi512_mask ((const int *)__P,
@@ -4369,7 +4370,7 @@
                                                      (__mmask16) __U);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_loadu_epi64 (__m512i __W, __mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *) __P,
@@ -4377,7 +4378,7 @@
                   (__mmask8) __U);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_loadu_epi64(__mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_loaddqudi512_mask ((const long long *)__P,
@@ -4386,7 +4387,7 @@
                                                      (__mmask8) __U);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_loadu_ps (__m512 __W, __mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadups512_mask ((const float *) __P,
@@ -4394,7 +4395,7 @@
                    (__mmask16) __U);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_loadu_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadups512_mask ((const float *)__P,
@@ -4403,7 +4404,7 @@
                                                   (__mmask16) __U);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_loadu_pd (__m512d __W, __mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *) __P,
@@ -4411,7 +4412,7 @@
                 (__mmask8) __U);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_loadu_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadupd512_mask ((const double *)__P,
@@ -4420,7 +4421,7 @@
                                                    (__mmask8) __U);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_loadu_pd(void const *__p)
 {
   struct __loadu_pd {
@@ -4429,7 +4430,7 @@
   return ((struct __loadu_pd*)__p)->__v;
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_loadu_ps(void const *__p)
 {
   struct __loadu_ps {
@@ -4438,13 +4439,13 @@
   return ((struct __loadu_ps*)__p)->__v;
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_load_ps(void const *__p)
 {
   return *(__m512*)__p;
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_load_ps (__m512 __W, __mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *) __P,
@@ -4452,7 +4453,7 @@
                    (__mmask16) __U);
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_load_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_loadaps512_mask ((const __v16sf *)__P,
@@ -4461,13 +4462,13 @@
                                                   (__mmask16) __U);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_load_pd(void const *__p)
 {
   return *(__m512d*)__p;
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_load_pd (__m512d __W, __mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *) __P,
@@ -4475,7 +4476,7 @@
                           (__mmask8) __U);
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_load_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_loadapd512_mask ((const __v8df *)__P,
@@ -4484,19 +4485,19 @@
                                                    (__mmask8) __U);
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_load_si512 (void const *__P)
 {
   return *(__m512i *) __P;
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_load_epi32 (void const *__P)
 {
   return *(__m512i *) __P;
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_load_epi64 (void const *__P)
 {
   return *(__m512i *) __P;
@@ -4504,14 +4505,14 @@
 
 /* SIMD store ops */
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_mask_storeu_epi64(void *__P, __mmask8 __U, __m512i __A)
 {
   __builtin_ia32_storedqudi512_mask ((long long *)__P, (__v8di) __A,
                                      (__mmask8) __U);
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_storeu_si512 (void *__P, __m512i __A)
 {
   struct __storeu_si512 {
@@ -4520,20 +4521,20 @@
   ((struct __storeu_si512*)__P)->__v = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_mask_storeu_epi32(void *__P, __mmask16 __U, __m512i __A)
 {
   __builtin_ia32_storedqusi512_mask ((int *)__P, (__v16si) __A,
                                      (__mmask16) __U);
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_mask_storeu_pd(void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_storeupd512_mask ((double *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_storeu_pd(void *__P, __m512d __A)
 {
   struct __storeu_pd {
@@ -4542,14 +4543,14 @@
   ((struct __storeu_pd*)__P)->__v = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_mask_storeu_ps(void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_storeups512_mask ((float *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_storeu_ps(void *__P, __m512 __A)
 {
   struct __storeu_ps {
@@ -4558,44 +4559,44 @@
   ((struct __storeu_ps*)__P)->__v = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_mask_store_pd(void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_storeapd512_mask ((__v8df *)__P, (__v8df) __A, (__mmask8) __U);
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_store_pd(void *__P, __m512d __A)
 {
   *(__m512d*)__P = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_mask_store_ps(void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_storeaps512_mask ((__v16sf *)__P, (__v16sf) __A,
                                    (__mmask16) __U);
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_store_ps(void *__P, __m512 __A)
 {
   *(__m512*)__P = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_store_si512 (void *__P, __m512i __A)
 {
   *(__m512i *) __P = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_store_epi32 (void *__P, __m512i __A)
 {
   *(__m512i *) __P = __A;
 }
 
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS512
 _mm512_store_epi64 (void *__P, __m512i __A)
 {
   *(__m512i *) __P = __A;
@@ -4603,7 +4604,7 @@
 
 /* Mask ops */
 
-static __inline __mmask16 __DEFAULT_FN_ATTRS
+static __inline __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_knot(__mmask16 __M)
 {
   return __builtin_ia32_knothi(__M);
@@ -4711,7 +4712,7 @@
 #define _mm512_mask_cmpneq_epu64_mask(k, A, B) \
     _mm512_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi8_epi32(__m128i __A)
 {
   /* This function always performs a signed extension, but __v16qi is a char
@@ -4719,7 +4720,7 @@
   return (__m512i)__builtin_convertvector((__v16qs)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4727,7 +4728,7 @@
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi8_epi32(__mmask16 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4735,7 +4736,7 @@
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi8_epi64(__m128i __A)
 {
   /* This function always performs a signed extension, but __v16qi is a char
@@ -4743,7 +4744,7 @@
   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qs)__A, (__v16qs)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4751,7 +4752,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4759,13 +4760,13 @@
                                              (__v8di)_mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi32_epi64(__m256i __X)
 {
   return (__m512i)__builtin_convertvector((__v8si)__X, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4773,7 +4774,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi32_epi64(__mmask8 __U, __m256i __X)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4781,13 +4782,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi16_epi32(__m256i __A)
 {
   return (__m512i)__builtin_convertvector((__v16hi)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4795,7 +4796,7 @@
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi16_epi32(__mmask16 __U, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4803,13 +4804,13 @@
                                             (__v16si)_mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi16_epi64(__m128i __A)
 {
   return (__m512i)__builtin_convertvector((__v8hi)__A, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4817,7 +4818,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4825,13 +4826,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepu8_epi32(__m128i __A)
 {
   return (__m512i)__builtin_convertvector((__v16qu)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu8_epi32(__m512i __W, __mmask16 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4839,7 +4840,7 @@
                                              (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu8_epi32(__mmask16 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4847,13 +4848,13 @@
                                              (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepu8_epi64(__m128i __A)
 {
   return (__m512i)__builtin_convertvector(__builtin_shufflevector((__v16qu)__A, (__v16qu)__A, 0, 1, 2, 3, 4, 5, 6, 7), __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu8_epi64(__m512i __W, __mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4861,7 +4862,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4869,13 +4870,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepu32_epi64(__m256i __X)
 {
   return (__m512i)__builtin_convertvector((__v8su)__X, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu32_epi64(__m512i __W, __mmask8 __U, __m256i __X)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4883,7 +4884,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu32_epi64(__mmask8 __U, __m256i __X)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4891,13 +4892,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepu16_epi32(__m256i __A)
 {
   return (__m512i)__builtin_convertvector((__v16hu)__A, __v16si);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu16_epi32(__m512i __W, __mmask16 __U, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4905,7 +4906,7 @@
                                             (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu16_epi32(__mmask16 __U, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -4913,13 +4914,13 @@
                                             (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_cvtepu16_epi64(__m128i __A)
 {
   return (__m512i)__builtin_convertvector((__v8hu)__A, __v8di);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepu16_epi64(__m512i __W, __mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4927,7 +4928,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -4935,13 +4936,13 @@
                                              (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rorv_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_prorvd512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_rorv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -4949,7 +4950,7 @@
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_rorv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -4957,13 +4958,13 @@
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rorv_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_prorvq512((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_rorv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -4971,7 +4972,7 @@
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_rorv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -5047,13 +5048,13 @@
                                       (__v8di)_mm512_rol_epi64((a), (b)), \
                                       (__v8di)_mm512_setzero_si512())
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rolv_epi32 (__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_prolvd512((__v16si)__A, (__v16si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_rolv_epi32 (__m512i __W, __mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -5061,7 +5062,7 @@
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_rolv_epi32 (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512(__U,
@@ -5069,13 +5070,13 @@
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_rolv_epi64 (__m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_prolvq512((__v8di)__A, (__v8di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_rolv_epi64 (__m512i __W, __mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -5083,7 +5084,7 @@
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_rolv_epi64 (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512(__U,
@@ -5117,13 +5118,13 @@
                                       (__v8di)_mm512_ror_epi64((A), (B)), \
                                       (__v8di)_mm512_setzero_si512())
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_slli_epi32(__m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_pslldi512((__v16si)__A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_slli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5131,20 +5132,20 @@
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_slli_epi32(__mmask16 __U, __m512i __A, int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_slli_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_slli_epi64(__m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_psllqi512((__v8di)__A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_slli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5152,7 +5153,7 @@
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_slli_epi64(__mmask8 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5160,13 +5161,13 @@
                                           (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srli_epi32(__m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_psrldi512((__v16si)__A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srli_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5174,20 +5175,20 @@
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srli_epi32(__mmask16 __U, __m512i __A, int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
                                          (__v16si)_mm512_srli_epi32(__A, __B),
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srli_epi64(__m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_psrlqi512((__v8di)__A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srli_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5195,7 +5196,7 @@
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srli_epi64(__mmask8 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5203,7 +5204,7 @@
                                           (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_load_epi32 (__m512i __W, __mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
@@ -5211,7 +5212,7 @@
               (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_load_epi32 (__mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_movdqa32load512_mask ((const __v16si *) __P,
@@ -5220,14 +5221,14 @@
               (__mmask16) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_store_epi32 (void *__P, __mmask16 __U, __m512i __A)
 {
   __builtin_ia32_movdqa32store512_mask ((__v16si *) __P, (__v16si) __A,
           (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_mov_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
@@ -5235,7 +5236,7 @@
                  (__v16si) __W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_mov_epi32 (__mmask16 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_selectd_512 ((__mmask16) __U,
@@ -5243,7 +5244,7 @@
                  (__v16si) _mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_mov_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
@@ -5251,7 +5252,7 @@
                  (__v8di) __W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_mov_epi64 (__mmask8 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_selectq_512 ((__mmask8) __U,
@@ -5259,7 +5260,7 @@
                  (__v8di) _mm512_setzero_si512 ());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_load_epi64 (__m512i __W, __mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
@@ -5267,7 +5268,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_load_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_movdqa64load512_mask ((const __v8di *) __P,
@@ -5276,21 +5277,21 @@
               (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_store_epi64 (void *__P, __mmask8 __U, __m512i __A)
 {
   __builtin_ia32_movdqa64store512_mask ((__v8di *) __P, (__v8di) __A,
           (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_movedup_pd (__m512d __A)
 {
   return (__m512d)__builtin_shufflevector((__v8df)__A, (__v8df)__A,
                                           0, 0, 2, 2, 4, 4, 6, 6);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_movedup_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5298,7 +5299,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_movedup_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -5471,14 +5472,14 @@
                                                  (__mmask8)-1, (int)(R))
 
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_getexp_sd (__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_getexpsd128_round_mask ((__v2df) __A,
                  (__v2df) __B, (__v2df) _mm_setzero_pd(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_getexp_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
@@ -5494,7 +5495,7 @@
                                                  (__v2df)(__m128d)(W), \
                                                  (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_getexp_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_getexpsd128_round_mask ( (__v2df) __A,
@@ -5516,14 +5517,14 @@
                                                 (__v4sf)_mm_setzero_ps(), \
                                                 (__mmask8)-1, (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_getexp_ss (__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
                 (__v4sf) __B, (__v4sf)  _mm_setzero_ps(), (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_getexp_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
@@ -5539,7 +5540,7 @@
                                                 (__v4sf)(__m128)(W), \
                                                 (__mmask8)(U), (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_getexp_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_getexpss128_round_mask ((__v4sf) __A,
@@ -5645,7 +5646,7 @@
                                               (__v4sf)_mm_setzero_ps(), \
                                               (__mmask8)(U), (int)(R))
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kmov (__mmask16 __A)
 {
   return  __A;
@@ -5664,13 +5665,13 @@
   (long long)__builtin_ia32_vcvtsd2si64((__v2df)(__m128d)(A), (int)(R))
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sll_epi32(__m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_pslld512((__v16si) __A, (__v4si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sll_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5678,7 +5679,7 @@
                                           (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sll_epi32(__mmask16 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5686,13 +5687,13 @@
                                           (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sll_epi64(__m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_psllq512((__v8di)__A, (__v2di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sll_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5700,7 +5701,7 @@
                                              (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sll_epi64(__mmask8 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5708,13 +5709,13 @@
                                            (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sllv_epi32(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psllv16si((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sllv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5722,7 +5723,7 @@
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sllv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5730,13 +5731,13 @@
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sllv_epi64(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psllv8di((__v8di)__X, (__v8di)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sllv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5744,7 +5745,7 @@
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sllv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5752,13 +5753,13 @@
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sra_epi32(__m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_psrad512((__v16si) __A, (__v4si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sra_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5766,7 +5767,7 @@
                                           (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sra_epi32(__mmask16 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5774,13 +5775,13 @@
                                           (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_sra_epi64(__m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_psraq512((__v8di)__A, (__v2di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_sra_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5788,7 +5789,7 @@
                                            (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_sra_epi64(__mmask8 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5796,13 +5797,13 @@
                                            (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srav_epi32(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psrav16si((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srav_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5810,7 +5811,7 @@
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srav_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5818,13 +5819,13 @@
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srav_epi64(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psrav8di((__v8di)__X, (__v8di)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srav_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5832,7 +5833,7 @@
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srav_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5840,13 +5841,13 @@
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srl_epi32(__m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_psrld512((__v16si) __A, (__v4si)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srl_epi32(__m512i __W, __mmask16 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5854,7 +5855,7 @@
                                           (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srl_epi32(__mmask16 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5862,13 +5863,13 @@
                                           (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srl_epi64(__m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_psrlq512((__v8di)__A, (__v2di)__B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srl_epi64(__m512i __W, __mmask8 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5876,7 +5877,7 @@
                                            (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srl_epi64(__mmask8 __U, __m512i __A, __m128i __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5884,13 +5885,13 @@
                                            (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srlv_epi32(__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psrlv16si((__v16si)__X, (__v16si)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srlv_epi32(__m512i __W, __mmask16 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5898,7 +5899,7 @@
                                            (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srlv_epi32(__mmask16 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U,
@@ -5906,13 +5907,13 @@
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srlv_epi64 (__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_psrlv8di((__v8di)__X, (__v8di)__Y);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srlv_epi64(__m512i __W, __mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5920,7 +5921,7 @@
                                             (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srlv_epi64(__mmask8 __U, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U,
@@ -5978,7 +5979,7 @@
 #define _mm_cvt_roundsd_u32(A, R) \
   (unsigned int)__builtin_ia32_vcvtsd2usi32((__v2df)(__m128d)(A), (int)(R))
 
-static __inline__ unsigned __DEFAULT_FN_ATTRS
+static __inline__ unsigned __DEFAULT_FN_ATTRS128
 _mm_cvtsd_u32 (__m128d __A)
 {
   return (unsigned) __builtin_ia32_vcvtsd2usi32 ((__v2df) __A,
@@ -5990,7 +5991,7 @@
   (unsigned long long)__builtin_ia32_vcvtsd2usi64((__v2df)(__m128d)(A), \
                                                   (int)(R))
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
 _mm_cvtsd_u64 (__m128d __A)
 {
   return (unsigned long long) __builtin_ia32_vcvtsd2usi64 ((__v2df)
@@ -6016,7 +6017,7 @@
 #define _mm_cvt_roundss_u32(A, R) \
   (unsigned int)__builtin_ia32_vcvtss2usi32((__v4sf)(__m128)(A), (int)(R))
 
-static __inline__ unsigned __DEFAULT_FN_ATTRS
+static __inline__ unsigned __DEFAULT_FN_ATTRS128
 _mm_cvtss_u32 (__m128 __A)
 {
   return (unsigned) __builtin_ia32_vcvtss2usi32 ((__v4sf) __A,
@@ -6028,7 +6029,7 @@
   (unsigned long long)__builtin_ia32_vcvtss2usi64((__v4sf)(__m128)(A), \
                                                   (int)(R))
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
 _mm_cvtss_u64 (__m128 __A)
 {
   return (unsigned long long) __builtin_ia32_vcvtss2usi64 ((__v4sf)
@@ -6043,7 +6044,7 @@
 #define _mm_cvtt_roundsd_si32(A, R) \
   (int)__builtin_ia32_vcvttsd2si32((__v2df)(__m128d)(A), (int)(R))
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS128
 _mm_cvttsd_i32 (__m128d __A)
 {
   return (int) __builtin_ia32_vcvttsd2si32 ((__v2df) __A,
@@ -6057,7 +6058,7 @@
 #define _mm_cvtt_roundsd_i64(A, R) \
   (long long)__builtin_ia32_vcvttsd2si64((__v2df)(__m128d)(A), (int)(R))
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS128
 _mm_cvttsd_i64 (__m128d __A)
 {
   return (long long) __builtin_ia32_vcvttsd2si64 ((__v2df) __A,
@@ -6068,7 +6069,7 @@
 #define _mm_cvtt_roundsd_u32(A, R) \
   (unsigned int)__builtin_ia32_vcvttsd2usi32((__v2df)(__m128d)(A), (int)(R))
 
-static __inline__ unsigned __DEFAULT_FN_ATTRS
+static __inline__ unsigned __DEFAULT_FN_ATTRS128
 _mm_cvttsd_u32 (__m128d __A)
 {
   return (unsigned) __builtin_ia32_vcvttsd2usi32 ((__v2df) __A,
@@ -6080,7 +6081,7 @@
   (unsigned long long)__builtin_ia32_vcvttsd2usi64((__v2df)(__m128d)(A), \
                                                    (int)(R))
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
 _mm_cvttsd_u64 (__m128d __A)
 {
   return (unsigned long long) __builtin_ia32_vcvttsd2usi64 ((__v2df)
@@ -6095,7 +6096,7 @@
 #define _mm_cvtt_roundss_si32(A, R) \
   (int)__builtin_ia32_vcvttss2si32((__v4sf)(__m128)(A), (int)(R))
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS128
 _mm_cvttss_i32 (__m128 __A)
 {
   return (int) __builtin_ia32_vcvttss2si32 ((__v4sf) __A,
@@ -6109,7 +6110,7 @@
 #define _mm_cvtt_roundss_si64(A, R) \
   (long long)__builtin_ia32_vcvttss2si64((__v4sf)(__m128)(A), (int)(R))
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS128
 _mm_cvttss_i64 (__m128 __A)
 {
   return (long long) __builtin_ia32_vcvttss2si64 ((__v4sf) __A,
@@ -6120,7 +6121,7 @@
 #define _mm_cvtt_roundss_u32(A, R) \
   (unsigned int)__builtin_ia32_vcvttss2usi32((__v4sf)(__m128)(A), (int)(R))
 
-static __inline__ unsigned __DEFAULT_FN_ATTRS
+static __inline__ unsigned __DEFAULT_FN_ATTRS128
 _mm_cvttss_u32 (__m128 __A)
 {
   return (unsigned) __builtin_ia32_vcvttss2usi32 ((__v4sf) __A,
@@ -6132,7 +6133,7 @@
   (unsigned long long)__builtin_ia32_vcvttss2usi64((__v4sf)(__m128)(A), \
                                                    (int)(R))
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS128
 _mm_cvttss_u64 (__m128 __A)
 {
   return (unsigned long long) __builtin_ia32_vcvttss2usi64 ((__v4sf)
@@ -6167,13 +6168,13 @@
                                       (__v16sf)_mm512_permute_ps((X), (C)), \
                                       (__v16sf)_mm512_setzero_ps())
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_permutevar_pd(__m512d __A, __m512i __C)
 {
   return (__m512d)__builtin_ia32_vpermilvarpd512((__v8df)__A, (__v8di)__C);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_permutevar_pd(__m512d __W, __mmask8 __U, __m512d __A, __m512i __C)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -6181,7 +6182,7 @@
                                          (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutevar_pd(__mmask8 __U, __m512d __A, __m512i __C)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -6189,13 +6190,13 @@
                                          (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_permutevar_ps(__m512 __A, __m512i __C)
 {
   return (__m512)__builtin_ia32_vpermilvarps512((__v16sf)__A, (__v16si)__C);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_permutevar_ps(__m512 __W, __mmask16 __U, __m512 __A, __m512i __C)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -6203,7 +6204,7 @@
                                         (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutevar_ps(__mmask16 __U, __m512 __A, __m512i __C)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -6211,14 +6212,14 @@
                                         (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline __m512d __DEFAULT_FN_ATTRS
+static __inline __m512d __DEFAULT_FN_ATTRS512
 _mm512_permutex2var_pd(__m512d __A, __m512i __I, __m512d __B)
 {
   return (__m512d)__builtin_ia32_vpermi2varpd512((__v8df)__A, (__v8di)__I,
                                                  (__v8df)__B);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_permutex2var_pd(__m512d __A, __mmask8 __U, __m512i __I, __m512d __B)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__U,
@@ -6226,7 +6227,7 @@
                                   (__v8df)__A);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask2_permutex2var_pd(__m512d __A, __m512i __I, __mmask8 __U,
                              __m512d __B)
 {
@@ -6235,7 +6236,7 @@
                                   (__v8df)(__m512d)__I);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutex2var_pd(__mmask8 __U, __m512d __A, __m512i __I,
                              __m512d __B)
 {
@@ -6244,14 +6245,14 @@
                                   (__v8df)_mm512_setzero_pd());
 }
 
-static __inline __m512 __DEFAULT_FN_ATTRS
+static __inline __m512 __DEFAULT_FN_ATTRS512
 _mm512_permutex2var_ps(__m512 __A, __m512i __I, __m512 __B)
 {
   return (__m512)__builtin_ia32_vpermi2varps512((__v16sf)__A, (__v16si)__I,
                                                 (__v16sf) __B);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_permutex2var_ps(__m512 __A, __mmask16 __U, __m512i __I, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -6259,7 +6260,7 @@
                                  (__v16sf)__A);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask2_permutex2var_ps(__m512 __A, __m512i __I, __mmask16 __U, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -6267,7 +6268,7 @@
                                  (__v16sf)(__m512)__I);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutex2var_ps(__mmask16 __U, __m512 __A, __m512i __I, __m512 __B)
 {
   return (__m512)__builtin_ia32_selectps_512(__U,
@@ -6291,7 +6292,7 @@
                                              (__v8si)_mm256_setzero_si256(), \
                                              (__mmask8)(U), (int)(R))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvttpd_epu32 (__m512d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
@@ -6301,7 +6302,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvttpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
@@ -6310,7 +6311,7 @@
                   _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvttpd_epu32 (__mmask8 __U, __m512d __A)
 {
   return (__m256i) __builtin_ia32_cvttpd2udq512_mask ((__v8df) __A,
@@ -6422,7 +6423,7 @@
                                            (__v8df)_mm512_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_scalef_pd (__m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
@@ -6433,7 +6434,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_scalef_pd (__m512d __W, __mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
@@ -6443,7 +6444,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_scalef_pd (__mmask8 __U, __m512d __A, __m512d __B)
 {
   return (__m512d) __builtin_ia32_scalefpd512_mask ((__v8df) __A,
@@ -6472,7 +6473,7 @@
                                           (__v16sf)_mm512_setzero_ps(), \
                                           (__mmask16)(U), (int)(R))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_scalef_ps (__m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
@@ -6483,7 +6484,7 @@
                _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_scalef_ps (__m512 __W, __mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
@@ -6493,7 +6494,7 @@
                _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_scalef_ps (__mmask16 __U, __m512 __A, __m512 __B)
 {
   return (__m512) __builtin_ia32_scalefps512_mask ((__v16sf) __A,
@@ -6510,7 +6511,7 @@
                                               (__v2df)_mm_setzero_pd(), \
                                               (__mmask8)-1, (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_scalef_sd (__m128d __A, __m128d __B)
 {
   return (__m128d) __builtin_ia32_scalefsd_round_mask ((__v2df) __A,
@@ -6519,7 +6520,7 @@
               _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_scalef_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
@@ -6535,7 +6536,7 @@
                                               (__v2df)(__m128d)(W), \
                                               (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_scalef_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_scalefsd_round_mask ( (__v2df) __A,
@@ -6557,7 +6558,7 @@
                                              (__v4sf)_mm_setzero_ps(), \
                                              (__mmask8)-1, (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_scalef_ss (__m128 __A, __m128 __B)
 {
   return (__m128) __builtin_ia32_scalefss_round_mask ((__v4sf) __A,
@@ -6566,7 +6567,7 @@
              _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_scalef_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
@@ -6582,7 +6583,7 @@
                                              (__v4sf)(__m128)(W), \
                                              (__mmask8)(U), (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_scalef_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_scalefss_round_mask ( (__v4sf) __A,
@@ -6599,13 +6600,13 @@
                                              (__mmask8)(U), \
                                              _MM_FROUND_CUR_DIRECTION)
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srai_epi32(__m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_psradi512((__v16si)__A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srai_epi32(__m512i __W, __mmask16 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
@@ -6613,20 +6614,20 @@
                                          (__v16si)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srai_epi32(__mmask16 __U, __m512i __A, int __B) {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__U, \
                                          (__v16si)_mm512_srai_epi32(__A, __B), \
                                          (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_srai_epi64(__m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_psraqi512((__v8di)__A, __B);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_srai_epi64(__m512i __W, __mmask8 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
@@ -6634,7 +6635,7 @@
                                           (__v8di)__W);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_srai_epi64(__mmask8 __U, __m512i __A, int __B)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__U, \
@@ -6756,7 +6757,7 @@
                                             (__v2df)_mm_setzero_pd(), \
                                             (__mmask8)-1, (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_sqrt_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
@@ -6772,7 +6773,7 @@
                                             (__v2df)(__m128d)(W), \
                                             (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_sqrt_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
  return (__m128d) __builtin_ia32_sqrtsd_round_mask ( (__v2df) __A,
@@ -6794,7 +6795,7 @@
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)-1, (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_sqrt_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
@@ -6810,7 +6811,7 @@
                                            (__v4sf)(__m128)(W), (__mmask8)(U), \
                                            (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_sqrt_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
  return (__m128) __builtin_ia32_sqrtss_round_mask ( (__v4sf) __A,
@@ -6826,7 +6827,7 @@
                                            (__v4sf)_mm_setzero_ps(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_broadcast_f32x4(__m128 __A)
 {
   return (__m512)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
@@ -6834,7 +6835,7 @@
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcast_f32x4(__m512 __O, __mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -6842,7 +6843,7 @@
                                            (__v16sf)__O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcast_f32x4(__mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__M,
@@ -6850,14 +6851,14 @@
                                            (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_broadcast_f64x4(__m256d __A)
 {
   return (__m512d)__builtin_shufflevector((__v4df)__A, (__v4df)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcast_f64x4(__m512d __O, __mmask8 __M, __m256d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
@@ -6865,7 +6866,7 @@
                                             (__v8df)__O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcast_f64x4(__mmask8 __M, __m256d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__M,
@@ -6873,7 +6874,7 @@
                                             (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_broadcast_i32x4(__m128i __A)
 {
   return (__m512i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
@@ -6881,7 +6882,7 @@
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcast_i32x4(__m512i __O, __mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -6889,7 +6890,7 @@
                                            (__v16si)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcast_i32x4(__mmask16 __M, __m128i __A)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -6897,14 +6898,14 @@
                                            (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_broadcast_i64x4(__m256i __A)
 {
   return (__m512i)__builtin_shufflevector((__v4di)__A, (__v4di)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcast_i64x4(__m512i __O, __mmask8 __M, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -6912,7 +6913,7 @@
                                             (__v8di)__O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcast_i64x4(__mmask8 __M, __m256i __A)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -6920,7 +6921,7 @@
                                             (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcastsd_pd (__m512d __O, __mmask8 __M, __m128d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
@@ -6928,7 +6929,7 @@
                                               (__v8df) __O);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512(__M,
@@ -6936,7 +6937,7 @@
                                               (__v8df) _mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_broadcastss_ps (__m512 __O, __mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512(__M,
@@ -6944,7 +6945,7 @@
                                              (__v16sf) __O);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_broadcastss_ps (__mmask16 __M, __m128 __A)
 {
   return (__m512)__builtin_ia32_selectps_512(__M,
@@ -6952,7 +6953,7 @@
                                              (__v16sf) _mm512_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtsepi32_epi8 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
@@ -6960,14 +6961,14 @@
                (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
                (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtsepi32_epi8 (__mmask16 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb512_mask ((__v16si) __A,
@@ -6975,13 +6976,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
 {
   __builtin_ia32_pmovsdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtsepi32_epi16 (__m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
@@ -6989,14 +6990,14 @@
                (__mmask16) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
                (__v16hi) __O, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtsepi32_epi16 (__mmask16 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovsdw512_mask ((__v16si) __A,
@@ -7004,13 +7005,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
 {
   __builtin_ia32_pmovsdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtsepi64_epi8 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
@@ -7018,14 +7019,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
                (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtsepi64_epi8 (__mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb512_mask ((__v8di) __A,
@@ -7033,13 +7034,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovsqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtsepi64_epi32 (__m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
@@ -7047,14 +7048,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
                (__v8si) __O, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtsepi64_epi32 (__mmask8 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovsqd512_mask ((__v8di) __A,
@@ -7062,13 +7063,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi64_storeu_epi32 (void *__P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovsqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtsepi64_epi16 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
@@ -7076,14 +7077,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
                (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtsepi64_epi16 (__mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw512_mask ((__v8di) __A,
@@ -7091,13 +7092,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovsqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtusepi32_epi8 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
@@ -7105,7 +7106,7 @@
                 (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
@@ -7113,7 +7114,7 @@
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtusepi32_epi8 (__mmask16 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb512_mask ((__v16si) __A,
@@ -7121,13 +7122,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
 {
   __builtin_ia32_pmovusdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtusepi32_epi16 (__m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
@@ -7135,7 +7136,7 @@
                 (__mmask16) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
@@ -7143,7 +7144,7 @@
                 __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtusepi32_epi16 (__mmask16 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovusdw512_mask ((__v16si) __A,
@@ -7151,13 +7152,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi32_storeu_epi16 (void *__P, __mmask16 __M, __m512i __A)
 {
   __builtin_ia32_pmovusdw512mem_mask ((__v16hi*) __P, (__v16si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtusepi64_epi8 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
@@ -7165,7 +7166,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
@@ -7173,7 +7174,7 @@
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtusepi64_epi8 (__mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb512_mask ((__v8di) __A,
@@ -7181,13 +7182,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovusqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtusepi64_epi32 (__m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
@@ -7195,14 +7196,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
                 (__v8si) __O, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtusepi64_epi32 (__mmask8 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovusqd512_mask ((__v8di) __A,
@@ -7210,13 +7211,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovusqd512mem_mask ((__v8si*) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtusepi64_epi16 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
@@ -7224,14 +7225,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
                 (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtusepi64_epi16 (__mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw512_mask ((__v8di) __A,
@@ -7239,13 +7240,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtusepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovusqw512mem_mask ((__v8hi*) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi32_epi8 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
@@ -7253,14 +7254,14 @@
               (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_epi8 (__m128i __O, __mmask16 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
               (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi32_epi8 (__mmask16 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb512_mask ((__v16si) __A,
@@ -7268,13 +7269,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_storeu_epi8 (void * __P, __mmask16 __M, __m512i __A)
 {
   __builtin_ia32_pmovdb512mem_mask ((__v16qi *) __P, (__v16si) __A, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi32_epi16 (__m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
@@ -7282,14 +7283,14 @@
               (__mmask16) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_epi16 (__m256i __O, __mmask16 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
               (__v16hi) __O, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi32_epi16 (__mmask16 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovdw512_mask ((__v16si) __A,
@@ -7297,13 +7298,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi32_storeu_epi16 (void * __P, __mmask16 __M, __m512i __A)
 {
   __builtin_ia32_pmovdw512mem_mask ((__v16hi *) __P, (__v16si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi64_epi8 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
@@ -7311,14 +7312,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
               (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi64_epi8 (__mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb512_mask ((__v8di) __A,
@@ -7326,13 +7327,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovqb512mem_mask ((__v16qi *) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi64_epi32 (__m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
@@ -7340,14 +7341,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi64_epi32 (__m256i __O, __mmask8 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
               (__v8si) __O, __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi64_epi32 (__mmask8 __M, __m512i __A)
 {
   return (__m256i) __builtin_ia32_pmovqd512_mask ((__v8di) __A,
@@ -7355,13 +7356,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi64_storeu_epi32 (void* __P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovqd512mem_mask ((__v8si *) __P, (__v8di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_cvtepi64_epi16 (__m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
@@ -7369,14 +7370,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
               (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtepi64_epi16 (__mmask8 __M, __m512i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw512_mask ((__v8di) __A,
@@ -7384,7 +7385,7 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtepi64_storeu_epi16 (void *__P, __mmask8 __M, __m512i __A)
 {
   __builtin_ia32_pmovqw512mem_mask ((__v8hi *) __P, (__v8di) __A, __M);
@@ -7569,7 +7570,7 @@
                                            (__v8df)_mm512_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_getexp_pd (__m512d __A)
 {
   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
@@ -7578,7 +7579,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_getexp_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
@@ -7587,7 +7588,7 @@
                 _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_getexp_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_getexppd512_mask ((__v8df) __A,
@@ -7611,7 +7612,7 @@
                                           (__v16sf)_mm512_setzero_ps(), \
                                           (__mmask16)(U), (int)(R))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_getexp_ps (__m512 __A)
 {
   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
@@ -7620,7 +7621,7 @@
                _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_getexp_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
@@ -7629,7 +7630,7 @@
                _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_getexp_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_getexpps512_mask ((__v16sf) __A,
@@ -7814,7 +7815,7 @@
                                (__v8si)(__m256i)(index), \
                                (__v8di)(__m512i)(v1), (int)(scale))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7830,7 +7831,7 @@
                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
@@ -7846,7 +7847,7 @@
                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
                                          _MM_FROUND_CUR_DIRECTION)
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7862,7 +7863,7 @@
                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7878,7 +7879,7 @@
                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
                                         (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
@@ -7894,7 +7895,7 @@
                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7910,7 +7911,7 @@
                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fnmadd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7926,7 +7927,7 @@
                                         (__v4sf)(__m128)(B), (__mmask8)(U), \
                                         (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmadd_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
@@ -7942,7 +7943,7 @@
                                          (__v4sf)(__m128)(C), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmadd_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7958,7 +7959,7 @@
                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fnmsub_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -7974,7 +7975,7 @@
                                         -(__v4sf)(__m128)(B), (__mmask8)(U), \
                                         (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmsub_ss (__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __A,
@@ -7990,7 +7991,7 @@
                                          -(__v4sf)(__m128)(C), (__mmask8)(U), \
                                          _MM_FROUND_CUR_DIRECTION)
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmsub_ss (__m128 __W, __m128 __X, __m128 __Y, __mmask8 __U)
 {
  __m128 __Z = __builtin_ia32_vfmaddss3((__v4sf) __W,
@@ -8006,7 +8007,7 @@
                                          (__v4sf)(__m128)(Y), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8022,7 +8023,7 @@
                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
@@ -8038,7 +8039,7 @@
                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
                                           _MM_FROUND_CUR_DIRECTION)
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8054,7 +8055,7 @@
                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
                                           (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8070,7 +8071,7 @@
                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
@@ -8086,7 +8087,7 @@
                                           -(__v2df)(__m128d)(C), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8102,7 +8103,7 @@
                                           (__v2df)(__m128d)(Y), \
                                           (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fnmadd_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8118,7 +8119,7 @@
                                          (__v2df)(__m128d)(B), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmadd_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
@@ -8134,7 +8135,7 @@
                                           (__v2df)(__m128d)(C), (__mmask8)(U), \
                                           (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmadd_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8150,7 +8151,7 @@
                                           (__v2df)(__m128d)(Y), (__mmask8)(U), \
                                           (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fnmsub_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8166,7 +8167,7 @@
                                          -(__v2df)(__m128d)(B), (__mmask8)(U), \
                                          (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmsub_sd (__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __A,
@@ -8183,7 +8184,7 @@
                                           (__mmask8)(U), \
                                           _MM_FROUND_CUR_DIRECTION)
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmsub_sd (__m128d __W, __m128d __X, __m128d __Y, __mmask8 __U)
 {
  __m128d __Z = __builtin_ia32_vfmaddsd3((__v2df) __W,
@@ -8225,13 +8226,13 @@
                                       (__v8di)_mm512_permutex_epi64((X), (C)), \
                                       (__v8di)_mm512_setzero_si512())
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_permutexvar_pd (__m512i __X, __m512d __Y)
 {
   return (__m512d)__builtin_ia32_permvardf512((__v8df) __Y, (__v8di) __X);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_permutexvar_pd (__m512d __W, __mmask8 __U, __m512i __X, __m512d __Y)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -8239,7 +8240,7 @@
                                         (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutexvar_pd (__mmask8 __U, __m512i __X, __m512d __Y)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -8247,13 +8248,13 @@
                                         (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_permutexvar_epi64 (__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_permvardi512((__v8di)__Y, (__v8di)__X);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutexvar_epi64 (__mmask8 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectq_512((__mmask8)__M,
@@ -8261,7 +8262,7 @@
                                      (__v8di)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_permutexvar_epi64 (__m512i __W, __mmask8 __M, __m512i __X,
              __m512i __Y)
 {
@@ -8270,13 +8271,13 @@
                                      (__v8di)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_permutexvar_ps (__m512i __X, __m512 __Y)
 {
   return (__m512)__builtin_ia32_permvarsf512((__v16sf)__Y, (__v16si)__X);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_permutexvar_ps (__m512 __W, __mmask16 __U, __m512i __X, __m512 __Y)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8284,7 +8285,7 @@
                                        (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutexvar_ps (__mmask16 __U, __m512i __X, __m512 __Y)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8292,7 +8293,7 @@
                                        (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_permutexvar_epi32 (__m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_permvarsi512((__v16si)__Y, (__v16si)__X);
@@ -8300,7 +8301,7 @@
 
 #define _mm512_permutevar_epi32 _mm512_permutexvar_epi32
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_permutexvar_epi32 (__mmask16 __M, __m512i __X, __m512i __Y)
 {
   return (__m512i)__builtin_ia32_selectd_512((__mmask16)__M,
@@ -8308,7 +8309,7 @@
                                     (__v16si)_mm512_setzero_si512());
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_permutexvar_epi32 (__m512i __W, __mmask16 __M, __m512i __X,
              __m512i __Y)
 {
@@ -8319,83 +8320,83 @@
 
 #define _mm512_mask_permutevar_epi32 _mm512_mask_permutexvar_epi32
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kand (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kandhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kandn (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kandnhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kor (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_korhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_kortestc (__mmask16 __A, __mmask16 __B)
 {
   return __builtin_ia32_kortestchi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_kortestz (__mmask16 __A, __mmask16 __B)
 {
   return __builtin_ia32_kortestzhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kunpackb (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kunpckhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kxnor (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kxnorhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_kxor (__mmask16 __A, __mmask16 __B)
 {
   return (__mmask16) __builtin_ia32_kxorhi ((__mmask16) __A, (__mmask16) __B);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_stream_si512 (__m512i * __P, __m512i __A)
 {
   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   __builtin_nontemporal_store((__v8di_aligned)__A, (__v8di_aligned*)__P);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_stream_load_si512 (void const *__P)
 {
   typedef __v8di __v8di_aligned __attribute__((aligned(64)));
   return (__m512i) __builtin_nontemporal_load((const __v8di_aligned *)__P);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_stream_pd (double *__P, __m512d __A)
 {
   typedef __v8df __v8df_aligned __attribute__((aligned(64)));
   __builtin_nontemporal_store((__v8df_aligned)__A, (__v8df_aligned*)__P);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_stream_ps (float *__P, __m512 __A)
 {
   typedef __v16sf __v16sf_aligned __attribute__((aligned(64)));
   __builtin_nontemporal_store((__v16sf_aligned)__A, (__v16sf_aligned*)__P);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_compress_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
@@ -8403,7 +8404,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_compress_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_compressdf512_mask ((__v8df) __A,
@@ -8412,7 +8413,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_compress_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
@@ -8420,7 +8421,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_compress_epi64 (__mmask8 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_compressdi512_mask ((__v8di) __A,
@@ -8429,7 +8430,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_compress_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
@@ -8437,7 +8438,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_compress_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_compresssf512_mask ((__v16sf) __A,
@@ -8446,7 +8447,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_compress_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
@@ -8454,7 +8455,7 @@
                   (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_compress_epi32 (__mmask16 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_compresssi512_mask ((__v16si) __A,
@@ -8509,70 +8510,70 @@
 
 /* Bit Test */
 
-static __inline __mmask16 __DEFAULT_FN_ATTRS
+static __inline __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_test_epi32_mask (__m512i __A, __m512i __B)
 {
   return _mm512_cmpneq_epi32_mask (_mm512_and_epi32(__A, __B),
                                    _mm512_setzero_si512());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_mask_test_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return _mm512_mask_cmpneq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
                                         _mm512_setzero_si512());
 }
 
-static __inline __mmask8 __DEFAULT_FN_ATTRS
+static __inline __mmask8 __DEFAULT_FN_ATTRS512
 _mm512_test_epi64_mask (__m512i __A, __m512i __B)
 {
   return _mm512_cmpneq_epi64_mask (_mm512_and_epi32 (__A, __B),
                                    _mm512_setzero_si512());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
 _mm512_mask_test_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return _mm512_mask_cmpneq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
                                         _mm512_setzero_si512());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_testn_epi32_mask (__m512i __A, __m512i __B)
 {
   return _mm512_cmpeq_epi32_mask (_mm512_and_epi32 (__A, __B),
                                   _mm512_setzero_si512());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS512
 _mm512_mask_testn_epi32_mask (__mmask16 __U, __m512i __A, __m512i __B)
 {
   return _mm512_mask_cmpeq_epi32_mask (__U, _mm512_and_epi32 (__A, __B),
                                        _mm512_setzero_si512());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
 _mm512_testn_epi64_mask (__m512i __A, __m512i __B)
 {
   return _mm512_cmpeq_epi64_mask (_mm512_and_epi32 (__A, __B),
                                   _mm512_setzero_si512());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS512
 _mm512_mask_testn_epi64_mask (__mmask8 __U, __m512i __A, __m512i __B)
 {
   return _mm512_mask_cmpeq_epi64_mask (__U, _mm512_and_epi32 (__A, __B),
                                        _mm512_setzero_si512());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_movehdup_ps (__m512 __A)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
                          1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_movehdup_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8580,7 +8581,7 @@
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_movehdup_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8588,14 +8589,14 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_moveldup_ps (__m512 __A)
 {
   return (__m512)__builtin_shufflevector((__v16sf)__A, (__v16sf)__A,
                          0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_moveldup_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8603,7 +8604,7 @@
                                              (__v16sf)__W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_moveldup_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512)__builtin_ia32_selectps_512((__mmask16)__U,
@@ -8611,7 +8612,7 @@
                                              (__v16sf)_mm512_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_move_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   __m128 res = __A;
@@ -8619,7 +8620,7 @@
   return res;
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_move_ss (__mmask8 __U, __m128 __A, __m128 __B)
 {
   __m128 res = __A;
@@ -8627,7 +8628,7 @@
   return res;
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_move_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
   __m128d res = __A;
@@ -8635,7 +8636,7 @@
   return res;
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_move_sd (__mmask8 __U, __m128d __A, __m128d __B)
 {
   __m128d res = __A;
@@ -8643,19 +8644,19 @@
   return res;
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_store_ss (float * __W, __mmask8 __U, __m128 __A)
 {
   __builtin_ia32_storess128_mask ((__v4sf *)__W, __A, __U & 1);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_store_sd (double * __W, __mmask8 __U, __m128d __A)
 {
   __builtin_ia32_storesd128_mask ((__v2df *)__W, __A, __U & 1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_load_ss (__m128 __W, __mmask8 __U, const float* __A)
 {
   __m128 src = (__v4sf) __builtin_shufflevector((__v4sf) __W,
@@ -8665,7 +8666,7 @@
   return (__m128) __builtin_ia32_loadss128_mask ((__v4sf *) __A, src, __U & 1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_load_ss (__mmask8 __U, const float* __A)
 {
   return (__m128)__builtin_ia32_loadss128_mask ((__v4sf *) __A,
@@ -8673,7 +8674,7 @@
                                                 __U & 1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_load_sd (__m128d __W, __mmask8 __U, const double* __A)
 {
   __m128d src = (__v2df) __builtin_shufflevector((__v2df) __W,
@@ -8683,7 +8684,7 @@
   return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A, src, __U & 1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_load_sd (__mmask8 __U, const double* __A)
 {
   return (__m128d) __builtin_ia32_loadsd128_mask ((__v2df *) __A,
@@ -8704,7 +8705,7 @@
                                       (__v16si)_mm512_shuffle_epi32((A), (I)), \
                                       (__v16si)_mm512_setzero_si512())
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_expand_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
@@ -8712,7 +8713,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_expand_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_expanddf512_mask ((__v8df) __A,
@@ -8720,7 +8721,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_expand_epi64 (__m512i __W, __mmask8 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
@@ -8728,7 +8729,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_expand_epi64 ( __mmask8 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_expanddi512_mask ((__v8di) __A,
@@ -8736,7 +8737,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_expandloadu_pd(__m512d __W, __mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
@@ -8744,7 +8745,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_expandloadu_pd(__mmask8 __U, void const *__P)
 {
   return (__m512d) __builtin_ia32_expandloaddf512_mask ((const __v8df *)__P,
@@ -8752,7 +8753,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_expandloadu_epi64(__m512i __W, __mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
@@ -8760,7 +8761,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_expandloadu_epi64(__mmask8 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_expandloaddi512_mask ((const __v8di *)__P,
@@ -8768,7 +8769,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_expandloadu_ps(__m512 __W, __mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
@@ -8776,7 +8777,7 @@
                    (__mmask16) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_expandloadu_ps(__mmask16 __U, void const *__P)
 {
   return (__m512) __builtin_ia32_expandloadsf512_mask ((const __v16sf *)__P,
@@ -8784,7 +8785,7 @@
                    (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_expandloadu_epi32(__m512i __W, __mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
@@ -8792,7 +8793,7 @@
               (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_expandloadu_epi32(__mmask16 __U, void const *__P)
 {
   return (__m512i) __builtin_ia32_expandloadsi512_mask ((const __v16si *)__P,
@@ -8800,7 +8801,7 @@
               (__mmask16) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_expand_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
@@ -8808,7 +8809,7 @@
                (__mmask16) __U);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_expand_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_expandsf512_mask ((__v16sf) __A,
@@ -8816,7 +8817,7 @@
                (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_expand_epi32 (__m512i __W, __mmask16 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
@@ -8824,7 +8825,7 @@
                 (__mmask16) __U);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_maskz_expand_epi32 (__mmask16 __U, __m512i __A)
 {
   return (__m512i) __builtin_ia32_expandsi512_mask ((__v16si) __A,
@@ -8847,13 +8848,13 @@
                                            (__v8df)_mm512_setzero_pd(), \
                                            (__mmask8)(U), (int)(R))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_cvtps_pd (__m256 __A)
 {
   return (__m512d) __builtin_convertvector((__v8sf)__A, __v8df);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtps_pd (__m512d __W, __mmask8 __U, __m256 __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -8861,7 +8862,7 @@
                                               (__v8df)__W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_cvtps_pd (__mmask8 __U, __m256 __A)
 {
   return (__m512d)__builtin_ia32_selectpd_512((__mmask8)__U,
@@ -8869,19 +8870,19 @@
                                               (__v8df)_mm512_setzero_pd());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_cvtpslo_pd (__m512 __A)
 {
   return (__m512) _mm512_cvtps_pd(_mm512_castps512_ps256(__A));
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_cvtpslo_pd (__m512d __W, __mmask8 __U, __m512 __A)
 {
   return (__m512) _mm512_mask_cvtps_pd(__W, __U, _mm512_castps512_ps256(__A));
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_mov_pd (__m512d __W, __mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
@@ -8889,7 +8890,7 @@
               (__v8df) __W);
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_maskz_mov_pd (__mmask8 __U, __m512d __A)
 {
   return (__m512d) __builtin_ia32_selectpd_512 ((__mmask8) __U,
@@ -8897,7 +8898,7 @@
               (__v8df) _mm512_setzero_pd ());
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_mov_ps (__m512 __W, __mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
@@ -8905,7 +8906,7 @@
              (__v16sf) __W);
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_maskz_mov_ps (__mmask16 __U, __m512 __A)
 {
   return (__m512) __builtin_ia32_selectps_512 ((__mmask16) __U,
@@ -8913,28 +8914,28 @@
              (__v16sf) _mm512_setzero_ps ());
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m512d __A)
 {
   __builtin_ia32_compressstoredf512_mask ((__v8df *) __P, (__v8df) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m512i __A)
 {
   __builtin_ia32_compressstoredi512_mask ((__v8di *) __P, (__v8di) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_compressstoreu_ps (void *__P, __mmask16 __U, __m512 __A)
 {
   __builtin_ia32_compressstoresf512_mask ((__v16sf *) __P, (__v16sf) __A,
             (__mmask16) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS512
 _mm512_mask_compressstoreu_epi32 (void *__P, __mmask16 __U, __m512i __A)
 {
   __builtin_ia32_compressstoresi512_mask ((__v16si *) __P, (__v16si) __A,
@@ -8959,7 +8960,7 @@
                                              (__v4sf)_mm_setzero_ps(), \
                                              (__mmask8)(U), (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsd_ss (__m128 __W, __mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -8968,7 +8969,7 @@
                                              (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsd_ss (__mmask8 __U, __m128 __A, __m128d __B)
 {
   return __builtin_ia32_cvtsd2ss_round_mask ((__v4sf)__A,
@@ -9032,7 +9033,7 @@
                                               (__v2df)_mm_setzero_pd(), \
                                               (__mmask8)(U), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_cvtss_sd (__m128d __W, __mmask8 __U, __m128d __A, __m128 __B)
 {
   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
@@ -9041,7 +9042,7 @@
                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtss_sd (__mmask8 __U, __m128d __A, __m128 __B)
 {
   return __builtin_ia32_cvtss2sd_round_mask((__v2df)__A,
@@ -9050,7 +9051,7 @@
                                             (__mmask8)__U, _MM_FROUND_CUR_DIRECTION);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_cvtu32_sd (__m128d __A, unsigned __B)
 {
   __A[0] = __B;
@@ -9062,7 +9063,7 @@
   (__m128d)__builtin_ia32_cvtusi2sd64((__v2df)(__m128d)(A), \
                                       (unsigned long long)(B), (int)(R))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_cvtu64_sd (__m128d __A, unsigned long long __B)
 {
   __A[0] = __B;
@@ -9074,7 +9075,7 @@
   (__m128)__builtin_ia32_cvtusi2ss32((__v4sf)(__m128)(A), (unsigned int)(B), \
                                      (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtu32_ss (__m128 __A, unsigned __B)
 {
   __A[0] = __B;
@@ -9086,7 +9087,7 @@
   (__m128)__builtin_ia32_cvtusi2ss64((__v4sf)(__m128)(A), \
                                      (unsigned long long)(B), (int)(R))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtu64_ss (__m128 __A, unsigned long long __B)
 {
   __A[0] = __B;
@@ -9094,7 +9095,7 @@
 }
 #endif
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_set1_epi32 (__m512i __O, __mmask16 __M, int __A)
 {
   return (__m512i) __builtin_ia32_selectd_512(__M,
@@ -9102,7 +9103,7 @@
                                               (__v16si) __O);
 }
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_mask_set1_epi64 (__m512i __O, __mmask8 __M, long long __A)
 {
   return (__m512i) __builtin_ia32_selectq_512(__M,
@@ -9110,7 +9111,7 @@
                                               (__v8di) __O);
 }
 
-static  __inline __m512i __DEFAULT_FN_ATTRS
+static  __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set_epi8 (char __e63, char __e62, char __e61, char __e60, char __e59,
     char __e58, char __e57, char __e56, char __e55, char __e54, char __e53,
     char __e52, char __e51, char __e50, char __e49, char __e48, char __e47,
@@ -9134,7 +9135,7 @@
      __e56, __e57, __e58, __e59, __e60, __e61, __e62, __e63};
 }
 
-static  __inline __m512i __DEFAULT_FN_ATTRS
+static  __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set_epi16(short __e31, short __e30, short __e29, short __e28,
     short __e27, short __e26, short __e25, short __e24, short __e23,
     short __e22, short __e21, short __e20, short __e19, short __e18,
@@ -9149,7 +9150,7 @@
      __e24, __e25, __e26, __e27, __e28, __e29, __e30, __e31 };
 }
 
-static __inline __m512i __DEFAULT_FN_ATTRS
+static __inline __m512i __DEFAULT_FN_ATTRS512
 _mm512_set_epi32 (int __A, int __B, int __C, int __D,
      int __E, int __F, int __G, int __H,
      int __I, int __J, int __K, int __L,
@@ -9165,7 +9166,7 @@
   _mm512_set_epi32((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6), \
                    (e5),(e4),(e3),(e2),(e1),(e0))
 
-static __inline__ __m512i __DEFAULT_FN_ATTRS
+static __inline__ __m512i __DEFAULT_FN_ATTRS512
 _mm512_set_epi64 (long long __A, long long __B, long long __C,
      long long __D, long long __E, long long __F,
      long long __G, long long __H)
@@ -9177,7 +9178,7 @@
 #define _mm512_setr_epi64(e0,e1,e2,e3,e4,e5,e6,e7)           \
   _mm512_set_epi64((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_set_pd (double __A, double __B, double __C, double __D,
         double __E, double __F, double __G, double __H)
 {
@@ -9188,7 +9189,7 @@
 #define _mm512_setr_pd(e0,e1,e2,e3,e4,e5,e6,e7)              \
   _mm512_set_pd((e7),(e6),(e5),(e4),(e3),(e2),(e1),(e0))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_set_ps (float __A, float __B, float __C, float __D,
         float __E, float __F, float __G, float __H,
         float __I, float __J, float __K, float __L,
@@ -9203,25 +9204,25 @@
   _mm512_set_ps((e15),(e14),(e13),(e12),(e11),(e10),(e9),(e8),(e7),(e6),(e5), \
                 (e4),(e3),(e2),(e1),(e0))
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_abs_ps(__m512 __A)
 {
   return (__m512)_mm512_and_epi32(_mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
 }
 
-static __inline__ __m512 __DEFAULT_FN_ATTRS
+static __inline__ __m512 __DEFAULT_FN_ATTRS512
 _mm512_mask_abs_ps(__m512 __W, __mmask16 __K, __m512 __A)
 {
   return (__m512)_mm512_mask_and_epi32((__m512i)__W, __K, _mm512_set1_epi32(0x7FFFFFFF),(__m512i)__A) ;
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_abs_pd(__m512d __A)
 {
   return (__m512d)_mm512_and_epi64(_mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A) ;
 }
 
-static __inline__ __m512d __DEFAULT_FN_ATTRS
+static __inline__ __m512d __DEFAULT_FN_ATTRS512
 _mm512_mask_abs_pd(__m512d __W, __mmask8 __K, __m512d __A)
 {
   return (__m512d)_mm512_mask_and_epi64((__v8di)__W, __K, _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF),(__v8di)__A);
@@ -9248,41 +9249,41 @@
   __v2du __t8 = __t6 op __t7; \
   return __t8[0];
 
-static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_add_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_add_epi64(__m512i __W) {
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_mul_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_epi64(__m512i __W) {
   _mm512_mask_reduce_operator(*);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_and_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_and_epi64(__m512i __W) {
   _mm512_mask_reduce_operator(&);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS _mm512_reduce_or_epi64(__m512i __W) {
+static __inline__ long long __DEFAULT_FN_ATTRS512 _mm512_reduce_or_epi64(__m512i __W) {
   _mm512_mask_reduce_operator(|);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi64(__M, __W);
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(1), __M, __W);
   _mm512_mask_reduce_operator(*);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_and_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __W);
   _mm512_mask_reduce_operator(&);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_or_epi64(__mmask8 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi64(__M, __W);
   _mm512_mask_reduce_operator(|);
@@ -9300,21 +9301,21 @@
   __m128d __t8 = __t6 op __t7; \
   return __t8[0];
 
-static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_add_pd(__m512d __W) {
+static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_add_pd(__m512d __W) {
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS _mm512_reduce_mul_pd(__m512d __W) {
+static __inline__ double __DEFAULT_FN_ATTRS512 _mm512_reduce_mul_pd(__m512d __W) {
   _mm512_mask_reduce_operator(*);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_pd(__mmask8 __M, __m512d __W) {
   __W = _mm512_maskz_mov_pd(__M, __W);
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_pd(__mmask8 __M, __m512d __W) {
   __W = _mm512_mask_mov_pd(_mm512_set1_pd(1.0), __M, __W);
   _mm512_mask_reduce_operator(*);
@@ -9334,45 +9335,45 @@
   __v4su __t10 = __t8 op __t9; \
   return __t10[0];
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_epi32(__m512i __W) {
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_mul_epi32(__m512i __W) {
   _mm512_mask_reduce_operator(*);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_and_epi32(__m512i __W) {
   _mm512_mask_reduce_operator(&);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_or_epi32(__m512i __W) {
   _mm512_mask_reduce_operator(|);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi32(__M, __W);
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(1), __M, __W);
   _mm512_mask_reduce_operator(*);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_and_epi32( __mmask16 __M, __m512i __W) {
   __W = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __W);
   _mm512_mask_reduce_operator(&);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_or_epi32(__mmask16 __M, __m512i __W) {
   __W = _mm512_maskz_mov_epi32(__M, __W);
   _mm512_mask_reduce_operator(|);
@@ -9392,23 +9393,23 @@
   __m128 __t10 = __t8 op __t9; \
   return __t10[0];
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_reduce_add_ps(__m512 __W) {
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_reduce_mul_ps(__m512 __W) {
   _mm512_mask_reduce_operator(*);
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_add_ps(__mmask16 __M, __m512 __W) {
   __W = _mm512_maskz_mov_ps(__M, __W);
   _mm512_mask_reduce_operator(+);
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_mul_ps(__mmask16 __M, __m512 __W) {
   __W = _mm512_mask_mov_ps(_mm512_set1_ps(1.0f), __M, __W);
   _mm512_mask_reduce_operator(*);
@@ -9424,45 +9425,45 @@
   __v8di __t6 = (__v8di)_mm512_##op(__t4, __t5); \
   return __t6[0];
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epi64(__m512i __V) {
   _mm512_mask_reduce_operator(max_epi64);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epu64(__m512i __V) {
   _mm512_mask_reduce_operator(max_epu64);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epi64(__m512i __V) {
   _mm512_mask_reduce_operator(min_epi64);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epu64(__m512i __V) {
   _mm512_mask_reduce_operator(min_epu64);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epi64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(-__LONG_LONG_MAX__ - 1LL), __M, __V);
   _mm512_mask_reduce_operator(max_epi64);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_maskz_mov_epi64(__M, __V);
   _mm512_mask_reduce_operator(max_epu64);
 }
 
-static __inline__ long long __DEFAULT_FN_ATTRS
+static __inline__ long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epi64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(__LONG_LONG_MAX__), __M, __V);
   _mm512_mask_reduce_operator(min_epi64);
 }
 
-static __inline__ unsigned long long __DEFAULT_FN_ATTRS
+static __inline__ unsigned long long __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epu64(__mmask8 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi64(_mm512_set1_epi64(~0ULL), __M, __V);
   _mm512_mask_reduce_operator(min_epu64);
@@ -9482,45 +9483,45 @@
   __v4si __t10 = (__v4si)_mm_##op(__t8, __t9); \
   return __t10[0];
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epi32(__m512i __V) {
   _mm512_mask_reduce_operator(max_epi32);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_epu32(__m512i __V) {
   _mm512_mask_reduce_operator(max_epu32);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epi32(__m512i __V) {
   _mm512_mask_reduce_operator(min_epi32);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_epu32(__m512i __V) {
   _mm512_mask_reduce_operator(min_epu32);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epi32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(-__INT_MAX__ - 1), __M, __V);
   _mm512_mask_reduce_operator(max_epi32);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_epu32(__mmask16 __M, __m512i __V) {
   __V = _mm512_maskz_mov_epi32(__M, __V);
   _mm512_mask_reduce_operator(max_epu32);
 }
 
-static __inline__ int __DEFAULT_FN_ATTRS
+static __inline__ int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epi32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(__INT_MAX__), __M, __V);
   _mm512_mask_reduce_operator(min_epi32);
 }
 
-static __inline__ unsigned int __DEFAULT_FN_ATTRS
+static __inline__ unsigned int __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_epu32(__mmask16 __M, __m512i __V) {
   __V = _mm512_mask_mov_epi32(_mm512_set1_epi32(~0U), __M, __V);
   _mm512_mask_reduce_operator(min_epu32);
@@ -9538,23 +9539,23 @@
   __m128d __t8 = _mm_##op(__t6, __t7); \
   return __t8[0];
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_pd(__m512d __V) {
   _mm512_mask_reduce_operator(max_pd);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_pd(__m512d __V) {
   _mm512_mask_reduce_operator(min_pd);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_pd(__mmask8 __M, __m512d __V) {
   __V = _mm512_mask_mov_pd(_mm512_set1_pd(-__builtin_inf()), __M, __V);
   _mm512_mask_reduce_operator(max_pd);
 }
 
-static __inline__ double __DEFAULT_FN_ATTRS
+static __inline__ double __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_pd(__mmask8 __M, __m512d __V) {
   __V = _mm512_mask_mov_pd(_mm512_set1_pd(__builtin_inf()), __M, __V);
   _mm512_mask_reduce_operator(min_pd);
@@ -9574,29 +9575,30 @@
   __m128 __t10 = _mm_##op(__t8, __t9); \
   return __t10[0];
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_reduce_max_ps(__m512 __V) {
   _mm512_mask_reduce_operator(max_ps);
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_reduce_min_ps(__m512 __V) {
   _mm512_mask_reduce_operator(min_ps);
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_max_ps(__mmask16 __M, __m512 __V) {
   __V = _mm512_mask_mov_ps(_mm512_set1_ps(-__builtin_inff()), __M, __V);
   _mm512_mask_reduce_operator(max_ps);
 }
 
-static __inline__ float __DEFAULT_FN_ATTRS
+static __inline__ float __DEFAULT_FN_ATTRS512
 _mm512_mask_reduce_min_ps(__mmask16 __M, __m512 __V) {
   __V = _mm512_mask_mov_ps(_mm512_set1_ps(__builtin_inff()), __M, __V);
   _mm512_mask_reduce_operator(min_ps);
 }
 #undef _mm512_mask_reduce_operator
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS512
+#undef __DEFAULT_FN_ATTRS128
 
 #endif /* __AVX512FINTRIN_H */
Index: lib/Headers/avx512ifmaintrin.h
===================================================================
--- lib/Headers/avx512ifmaintrin.h
+++ lib/Headers/avx512ifmaintrin.h
@@ -29,7 +29,7 @@
 #define __IFMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
 _mm512_madd52hi_epu64 (__m512i __X, __m512i __Y, __m512i __Z)
Index: lib/Headers/avx512ifmavlintrin.h
===================================================================
--- lib/Headers/avx512ifmavlintrin.h
+++ lib/Headers/avx512ifmavlintrin.h
@@ -29,18 +29,19 @@
 #define __IFMAVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512ifma,avx512vl"), __min_vector_width__(256)))
 
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_madd52hi_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
 {
   return (__m128i)__builtin_ia32_vpmadd52huq128((__v2di) __X, (__v2di) __Y,
                                                 (__v2di) __Z);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_madd52hi_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -48,7 +49,7 @@
                                       (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_madd52hi_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -56,14 +57,14 @@
                                       (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_madd52hi_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
 {
   return (__m256i)__builtin_ia32_vpmadd52huq256((__v4di)__X, (__v4di)__Y,
                                                 (__v4di)__Z);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_madd52hi_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -71,7 +72,7 @@
                                    (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_madd52hi_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -79,14 +80,14 @@
                                    (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_madd52lo_epu64 (__m128i __X, __m128i __Y, __m128i __Z)
 {
   return (__m128i)__builtin_ia32_vpmadd52luq128((__v2di)__X, (__v2di)__Y,
                                                 (__v2di)__Z);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_madd52lo_epu64 (__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -94,7 +95,7 @@
                                       (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_madd52lo_epu64 (__mmask8 __M, __m128i __X, __m128i __Y, __m128i __Z)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -102,14 +103,14 @@
                                       (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_madd52lo_epu64 (__m256i __X, __m256i __Y, __m256i __Z)
 {
   return (__m256i)__builtin_ia32_vpmadd52luq256((__v4di)__X, (__v4di)__Y,
                                                 (__v4di)__Z);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_madd52lo_epu64 (__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -117,7 +118,7 @@
                                    (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_madd52lo_epu64 (__mmask8 __M, __m256i __X, __m256i __Y, __m256i __Z)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -126,6 +127,7 @@
 }
 
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avx512vbmi2intrin.h
===================================================================
--- lib/Headers/avx512vbmi2intrin.h
+++ lib/Headers/avx512vbmi2intrin.h
@@ -29,7 +29,7 @@
 #define __AVX512VBMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi2"), __min_vector_width__(512)))
 
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
Index: lib/Headers/avx512vbmiintrin.h
===================================================================
--- lib/Headers/avx512vbmiintrin.h
+++ lib/Headers/avx512vbmiintrin.h
@@ -29,7 +29,7 @@
 #define __VBMIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi"), __min_vector_width__(512)))
 
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
Index: lib/Headers/avx512vbmivlintrin.h
===================================================================
--- lib/Headers/avx512vbmivlintrin.h
+++ lib/Headers/avx512vbmivlintrin.h
@@ -29,10 +29,11 @@
 #define __VBMIVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vbmi,avx512vl"), __min_vector_width__(256)))
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_permutex2var_epi8(__m128i __A, __m128i __I, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpermi2varqi128((__v16qi)__A,
@@ -40,7 +41,7 @@
                                                  (__v16qi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_permutex2var_epi8(__m128i __A, __mmask16 __U, __m128i __I,
                            __m128i __B)
 {
@@ -49,7 +50,7 @@
                                   (__v16qi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask2_permutex2var_epi8(__m128i __A, __m128i __I, __mmask16 __U,
                             __m128i __B)
 {
@@ -58,7 +59,7 @@
                                   (__v16qi)__I);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_permutex2var_epi8(__mmask16 __U, __m128i __A, __m128i __I,
                             __m128i __B)
 {
@@ -67,14 +68,14 @@
                                   (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutex2var_epi8(__m256i __A, __m256i __I, __m256i __B)
 {
   return (__m256i)__builtin_ia32_vpermi2varqi256((__v32qi)__A, (__v32qi)__I,
                                                  (__v32qi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_permutex2var_epi8(__m256i __A, __mmask32 __U, __m256i __I,
                               __m256i __B)
 {
@@ -83,7 +84,7 @@
                                (__v32qi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask2_permutex2var_epi8(__m256i __A, __m256i __I, __mmask32 __U,
                                __m256i __B)
 {
@@ -92,7 +93,7 @@
                                (__v32qi)__I);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutex2var_epi8(__mmask32 __U, __m256i __A, __m256i __I,
                                __m256i __B)
 {
@@ -101,13 +102,13 @@
                                (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_permutexvar_epi8 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_permvarqi128((__v16qi)__B, (__v16qi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_permutexvar_epi8 (__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -115,7 +116,7 @@
                                         (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_permutexvar_epi8 (__m128i __W, __mmask16 __M, __m128i __A,
           __m128i __B)
 {
@@ -124,13 +125,13 @@
                                         (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutexvar_epi8 (__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_permvarqi256((__v32qi) __B, (__v32qi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_epi8 (__mmask32 __M, __m256i __A,
         __m256i __B)
 {
@@ -139,7 +140,7 @@
                                      (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_permutexvar_epi8 (__m256i __W, __mmask32 __M, __m256i __A,
              __m256i __B)
 {
@@ -148,7 +149,7 @@
                                      (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_multishift_epi64_epi8 (__m128i __W, __mmask16 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
@@ -157,7 +158,7 @@
                 (__mmask16) __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_multishift_epi64_epi8 (__mmask16 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
@@ -167,7 +168,7 @@
                 (__mmask16) __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_multishift_epi64_epi8 (__m128i __X, __m128i __Y)
 {
   return (__m128i) __builtin_ia32_vpmultishiftqb128_mask ((__v16qi) __X,
@@ -177,7 +178,7 @@
                 (__mmask16) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_multishift_epi64_epi8 (__m256i __W, __mmask32 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
@@ -186,7 +187,7 @@
                 (__mmask32) __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_multishift_epi64_epi8 (__mmask32 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
@@ -196,7 +197,7 @@
                 (__mmask32) __M);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_multishift_epi64_epi8 (__m256i __X, __m256i __Y)
 {
   return (__m256i) __builtin_ia32_vpmultishiftqb256_mask ((__v32qi) __X,
@@ -207,6 +208,7 @@
 }
 
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avx512vlbitalgintrin.h
===================================================================
--- lib/Headers/avx512vlbitalgintrin.h
+++ lib/Headers/avx512vlbitalgintrin.h
@@ -29,15 +29,16 @@
 #define __AVX512VLBITALGINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"), __min_vector_width__(256)))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_popcnt_epi16(__m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B)
 {
   return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U,
@@ -45,7 +46,7 @@
               (__v16hi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B)
 {
   return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(),
@@ -53,13 +54,13 @@
               __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_popcnt_epi16(__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B)
 {
   return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U,
@@ -67,7 +68,7 @@
               (__v8hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_popcnt_epi16(__mmask8 __U, __m128i __B)
 {
   return _mm_mask_popcnt_epi16((__m128i) _mm_setzero_si128(),
@@ -75,13 +76,13 @@
               __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_popcnt_epi8(__m256i __A)
 {
   return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B)
 {
   return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U,
@@ -89,7 +90,7 @@
               (__v32qi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B)
 {
   return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(),
@@ -97,13 +98,13 @@
               __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_popcnt_epi8(__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B)
 {
   return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U,
@@ -111,7 +112,7 @@
               (__v16qi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_popcnt_epi8(__mmask16 __U, __m128i __B)
 {
   return _mm_mask_popcnt_epi8((__m128i) _mm_setzero_si128(),
@@ -119,7 +120,7 @@
               __B);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_mask_bitshuffle_epi64_mask(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A,
@@ -127,7 +128,7 @@
               __U);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_bitshuffle_epi64_mask(__m256i __A, __m256i __B)
 {
   return _mm256_mask_bitshuffle_epi64_mask((__mmask32) -1,
@@ -135,7 +136,7 @@
               __B);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_mask_bitshuffle_epi64_mask(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A,
@@ -143,7 +144,7 @@
               __U);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_bitshuffle_epi64_mask(__m128i __A, __m128i __B)
 {
   return _mm_mask_bitshuffle_epi64_mask((__mmask16) -1,
@@ -152,6 +153,7 @@
 }
 
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avx512vlbwintrin.h
===================================================================
--- lib/Headers/avx512vlbwintrin.h
+++ lib/Headers/avx512vlbwintrin.h
@@ -29,7 +29,8 @@
 #define __AVX512VLBWINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bw"), __min_vector_width__(256)))
 
 /* Integer compare */
 
@@ -313,147 +314,147 @@
 #define _mm256_mask_cmpneq_epu16_mask(k, A, B) \
     _mm256_mask_cmp_epu16_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_add_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B){
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_add_epi8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_add_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_add_epi8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_add_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_add_epi16(__A, __B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_add_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_add_epi16(__A, __B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sub_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_sub_epi8(__A, __B),
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sub_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                              (__v32qi)_mm256_sub_epi8(__A, __B),
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sub_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_sub_epi16(__A, __B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sub_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_sub_epi16(__A, __B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_add_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_add_epi8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_add_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_add_epi8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_add_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_add_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_add_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_add_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sub_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_sub_epi8(__A, __B),
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sub_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                              (__v16qi)_mm_sub_epi8(__A, __B),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sub_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_sub_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sub_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_sub_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mullo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_mullo_epi16(__A, __B),
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mullo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                              (__v16hi)_mm256_mullo_epi16(__A, __B),
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mullo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mullo_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mullo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mullo_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_blend_epi8 (__mmask16 __U, __m128i __A, __m128i __W)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -461,7 +462,7 @@
               (__v16qi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_blend_epi8 (__mmask32 __U, __m256i __A, __m256i __W)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -469,7 +470,7 @@
                (__v32qi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_blend_epi16 (__mmask8 __U, __m128i __A, __m128i __W)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -477,7 +478,7 @@
                (__v8hi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_blend_epi16 (__mmask16 __U, __m256i __A, __m256i __W)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -485,7 +486,7 @@
                (__v16hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_abs_epi8(__m128i __W, __mmask16 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -493,7 +494,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_abs_epi8(__mmask16 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -501,7 +502,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_abs_epi8(__m256i __W, __mmask32 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -509,7 +510,7 @@
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_abs_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -517,7 +518,7 @@
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_abs_epi16(__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -525,7 +526,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_abs_epi16(__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -533,7 +534,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_abs_epi16(__m256i __W, __mmask16 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -541,7 +542,7 @@
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_abs_epi16(__mmask16 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -549,14 +550,14 @@
                                              (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_packs_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
                                              (__v8hi)_mm_packs_epi32(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_packs_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -564,7 +565,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_packs_epi32(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -572,7 +573,7 @@
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_packs_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -580,7 +581,7 @@
                                           (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_packs_epi16(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -588,7 +589,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_packs_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -596,7 +597,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_packs_epi16(__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -604,7 +605,7 @@
                                           (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_packs_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -612,7 +613,7 @@
                                           (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_packus_epi32(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -620,7 +621,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_packus_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -628,7 +629,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_packus_epi32(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -636,7 +637,7 @@
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_packus_epi32(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -644,7 +645,7 @@
                                          (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_packus_epi16(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -652,7 +653,7 @@
                                             (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_packus_epi16(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -660,7 +661,7 @@
                                             (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_packus_epi16(__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -668,7 +669,7 @@
                                          (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_packus_epi16(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -676,7 +677,7 @@
                                          (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_adds_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -684,7 +685,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_adds_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -692,7 +693,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_adds_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -700,7 +701,7 @@
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_adds_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -708,7 +709,7 @@
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_adds_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -716,7 +717,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_adds_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -724,7 +725,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_adds_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -732,7 +733,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_adds_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -740,7 +741,7 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_adds_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -748,7 +749,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_adds_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -756,7 +757,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_adds_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -764,7 +765,7 @@
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_adds_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -772,7 +773,7 @@
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_adds_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -780,7 +781,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_adds_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -788,7 +789,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_adds_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -796,7 +797,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_adds_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -804,7 +805,7 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_avg_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -812,7 +813,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_avg_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -820,7 +821,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_avg_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -828,7 +829,7 @@
                                              (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_avg_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -836,7 +837,7 @@
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_avg_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -844,7 +845,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_avg_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -852,7 +853,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_avg_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -860,7 +861,7 @@
                                             (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_avg_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -868,7 +869,7 @@
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epi8(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -876,7 +877,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -884,7 +885,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epi8(__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -892,7 +893,7 @@
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -900,7 +901,7 @@
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epi16(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -908,7 +909,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -916,7 +917,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epi16(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -924,7 +925,7 @@
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -932,7 +933,7 @@
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epu8(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -940,7 +941,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -948,7 +949,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -956,7 +957,7 @@
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -964,7 +965,7 @@
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epu16(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -972,7 +973,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -980,7 +981,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epu16(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -988,7 +989,7 @@
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -996,7 +997,7 @@
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epi8(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -1004,7 +1005,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epi8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -1012,7 +1013,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epi8(__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -1020,7 +1021,7 @@
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epi8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -1028,7 +1029,7 @@
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epi16(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -1036,7 +1037,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epi16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -1044,7 +1045,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epi16(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -1052,7 +1053,7 @@
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epi16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -1060,7 +1061,7 @@
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epu8(__mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -1068,7 +1069,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epu8(__m128i __W, __mmask16 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
@@ -1076,7 +1077,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epu8 (__mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -1084,7 +1085,7 @@
                                              (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epu8(__m256i __W, __mmask32 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__M,
@@ -1092,7 +1093,7 @@
                                              (__v32qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epu16(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -1100,7 +1101,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epu16(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -1108,7 +1109,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epu16(__mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -1116,7 +1117,7 @@
                                             (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epu16(__m256i __W, __mmask16 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__M,
@@ -1124,7 +1125,7 @@
                                             (__v16hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shuffle_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1132,7 +1133,7 @@
                                             (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shuffle_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1140,7 +1141,7 @@
                                             (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shuffle_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1148,7 +1149,7 @@
                                          (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shuffle_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1156,7 +1157,7 @@
                                          (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_subs_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1164,7 +1165,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_subs_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1172,7 +1173,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_subs_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1180,7 +1181,7 @@
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_subs_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1188,7 +1189,7 @@
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_subs_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1196,7 +1197,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_subs_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1204,7 +1205,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_subs_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1212,7 +1213,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_subs_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1220,7 +1221,7 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_subs_epu8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1228,7 +1229,7 @@
                                              (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_subs_epu8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
@@ -1236,7 +1237,7 @@
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_subs_epu8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1244,7 +1245,7 @@
                                             (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_subs_epu8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
@@ -1252,7 +1253,7 @@
                                             (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_subs_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1260,7 +1261,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_subs_epu16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1268,7 +1269,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_subs_epu16(__m256i __W, __mmask16 __U, __m256i __A,
       __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1276,7 +1277,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_subs_epu16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1284,14 +1285,14 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_permutex2var_epi16(__m128i __A, __m128i __I, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpermi2varhi128((__v8hi)__A, (__v8hi)__I,
                                                  (__v8hi) __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_permutex2var_epi16(__m128i __A, __mmask8 __U, __m128i __I,
                             __m128i __B)
 {
@@ -1300,7 +1301,7 @@
                                   (__v8hi)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask2_permutex2var_epi16(__m128i __A, __m128i __I, __mmask8 __U,
                              __m128i __B)
 {
@@ -1309,7 +1310,7 @@
                                   (__v8hi)__I);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_permutex2var_epi16 (__mmask8 __U, __m128i __A, __m128i __I,
             __m128i __B)
 {
@@ -1318,14 +1319,14 @@
                                   (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutex2var_epi16(__m256i __A, __m256i __I, __m256i __B)
 {
   return (__m256i)__builtin_ia32_vpermi2varhi256((__v16hi)__A, (__v16hi)__I,
                                                  (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_permutex2var_epi16(__m256i __A, __mmask16 __U, __m256i __I,
                                __m256i __B)
 {
@@ -1334,7 +1335,7 @@
                               (__v16hi)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask2_permutex2var_epi16(__m256i __A, __m256i __I, __mmask16 __U,
                                 __m256i __B)
 {
@@ -1343,7 +1344,7 @@
                               (__v16hi)__I);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutex2var_epi16 (__mmask16 __U, __m256i __A, __m256i __I,
                                  __m256i __B)
 {
@@ -1352,21 +1353,21 @@
                               (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_maddubs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                             (__v8hi)_mm_maddubs_epi16(__X, __Y),
                                             (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_maddubs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                             (__v8hi)_mm_maddubs_epi16(__X, __Y),
                                             (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_maddubs_epi16(__m256i __W, __mmask16 __U, __m256i __X,
                           __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1374,126 +1375,126 @@
                                         (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_maddubs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                         (__v16hi)_mm256_maddubs_epi16(__X, __Y),
                                         (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_madd_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_madd_epi16(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_madd_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_madd_epi16(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_madd_epi16(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                             (__v8si)_mm256_madd_epi16(__A, __B),
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_madd_epi16(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                             (__v8si)_mm256_madd_epi16(__A, __B),
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtsepi16_epi8 (__m128i __A) {
   return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
                (__v16qi) _mm_setzero_si128(),
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
                (__v16qi) __O,
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsepi16_epi8 (__mmask8 __M, __m128i __A) {
   return (__m128i) __builtin_ia32_pmovswb128_mask ((__v8hi) __A,
                (__v16qi) _mm_setzero_si128(),
                __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtsepi16_epi8 (__m256i __A) {
   return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
                (__v16qi) _mm_setzero_si128(),
                (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
   return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
                (__v16qi) __O,
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtsepi16_epi8 (__mmask16 __M, __m256i __A) {
   return (__m128i) __builtin_ia32_pmovswb256_mask ((__v16hi) __A,
                (__v16qi) _mm_setzero_si128(),
                __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtusepi16_epi8 (__m128i __A) {
   return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
                 (__v16qi) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
                 (__v16qi) __O,
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtusepi16_epi8 (__mmask8 __M, __m128i __A) {
   return (__m128i) __builtin_ia32_pmovuswb128_mask ((__v8hi) __A,
                 (__v16qi) _mm_setzero_si128(),
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtusepi16_epi8 (__m256i __A) {
   return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
                 (__v16qi) _mm_setzero_si128(),
                 (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
   return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
                 (__v16qi) __O,
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtusepi16_epi8 (__mmask16 __M, __m256i __A) {
   return (__m128i) __builtin_ia32_pmovuswb256_mask ((__v16hi) __A,
                 (__v16qi) _mm_setzero_si128(),
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi16_epi8 (__m128i __A) {
 
   return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
@@ -1501,273 +1502,273 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi16_epi8 (__m128i __O, __mmask8 __M, __m128i __A) {
   return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
                (__v16qi) __O,
                __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi16_epi8 (__mmask8 __M, __m128i __A) {
   return (__m128i) __builtin_ia32_pmovwb128_mask ((__v8hi) __A,
                (__v16qi) _mm_setzero_si128(),
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovwb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
 }
 
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovuswb128mem_mask ((__v16qi *) __P, (__v8hi) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi16_epi8 (__m256i __A) {
   return (__m128i)__builtin_convertvector((__v16hi) __A, __v16qi);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi16_epi8 (__m128i __O, __mmask16 __M, __m256i __A) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm256_cvtepi16_epi8(__A),
                                              (__v16qi)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi16_epi8 (__mmask16 __M, __m256i __A) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__M,
                                              (__v16qi)_mm256_cvtepi16_epi8(__A),
                                              (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
 {
   __builtin_ia32_pmovwb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
 {
   __builtin_ia32_pmovswb256mem_mask ((__v16qi *) __P, (__v16hi) __A, __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi16_storeu_epi8 (void * __P, __mmask16 __M, __m256i __A)
 {
   __builtin_ia32_pmovuswb256mem_mask ((__v16qi*) __P, (__v16hi) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mulhrs_epi16(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhrs_epi16(__X, __Y),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mulhrs_epi16(__mmask8 __U, __m128i __X, __m128i __Y) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhrs_epi16(__X, __Y),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mulhrs_epi16(__m256i __W, __mmask16 __U, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mulhrs_epi16(__mmask16 __U, __m256i __X, __m256i __Y) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                          (__v16hi)_mm256_mulhrs_epi16(__X, __Y),
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mulhi_epu16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhi_epu16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mulhi_epu16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhi_epu16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mulhi_epu16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                           (__v16hi)_mm256_mulhi_epu16(__A, __B),
                                           (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mulhi_epu16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                           (__v16hi)_mm256_mulhi_epu16(__A, __B),
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mulhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhi_epi16(__A, __B),
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mulhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                              (__v8hi)_mm_mulhi_epi16(__A, __B),
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mulhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                           (__v16hi)_mm256_mulhi_epi16(__A, __B),
                                           (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mulhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                           (__v16hi)_mm256_mulhi_epi16(__A, __B),
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpackhi_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpackhi_epi8(__A, __B),
                                            (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpackhi_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpackhi_epi8(__A, __B),
                                            (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpackhi_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpackhi_epi8(__A, __B),
                                         (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpackhi_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpackhi_epi8(__A, __B),
                                         (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpackhi_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpackhi_epi16(__A, __B),
                                            (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpackhi_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpackhi_epi16(__A, __B),
                                            (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpackhi_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpackhi_epi16(__A, __B),
                                        (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpackhi_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpackhi_epi16(__A, __B),
                                        (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpacklo_epi8(__m128i __W, __mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpacklo_epi8(__A, __B),
                                            (__v16qi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpacklo_epi8(__mmask16 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectb_128((__mmask16)__U,
                                            (__v16qi)_mm_unpacklo_epi8(__A, __B),
                                            (__v16qi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpacklo_epi8(__m256i __W, __mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpacklo_epi8(__A, __B),
                                         (__v32qi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpacklo_epi8(__mmask32 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectb_256((__mmask32)__U,
                                         (__v32qi)_mm256_unpacklo_epi8(__A, __B),
                                         (__v32qi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpacklo_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpacklo_epi16(__A, __B),
                                            (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpacklo_epi16(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
                                            (__v8hi)_mm_unpacklo_epi16(__A, __B),
                                            (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpacklo_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpacklo_epi16(__A, __B),
                                        (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpacklo_epi16(__mmask16 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
                                        (__v16hi)_mm256_unpacklo_epi16(__A, __B),
                                        (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1775,7 +1776,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi8_epi16(__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1783,7 +1784,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1791,7 +1792,7 @@
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi8_epi16(__mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1800,7 +1801,7 @@
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepu8_epi16(__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1808,7 +1809,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepu8_epi16(__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1816,7 +1817,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepu8_epi16(__m256i __W, __mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1824,7 +1825,7 @@
                                              (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepu8_epi16 (__mmask16 __U, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1875,13 +1876,13 @@
                                                                       (imm)), \
                                       (__v16hi)_mm256_setzero_si256())
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sllv_epi16(__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_psllv16hi((__v16hi)__A, (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sllv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1889,7 +1890,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sllv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1897,13 +1898,13 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_sllv_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psllv8hi((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sllv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1911,7 +1912,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sllv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1919,7 +1920,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sll_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1927,7 +1928,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sll_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1935,7 +1936,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sll_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1943,7 +1944,7 @@
                                           (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sll_epi16(__mmask16 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1951,7 +1952,7 @@
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_slli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1959,7 +1960,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_slli_epi16 (__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -1967,7 +1968,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_slli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1975,7 +1976,7 @@
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_slli_epi16(__mmask16 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1983,13 +1984,13 @@
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srlv_epi16(__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_psrlv16hi((__v16hi)__A, (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srlv_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -1997,7 +1998,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srlv_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2005,13 +2006,13 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srlv_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psrlv8hi((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srlv_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2019,7 +2020,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srlv_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2027,13 +2028,13 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srav_epi16(__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_psrav16hi((__v16hi)__A, (__v16hi)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srav_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2041,7 +2042,7 @@
                                            (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srav_epi16(__mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2049,13 +2050,13 @@
                                            (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srav_epi16(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psrav8hi((__v8hi)__A, (__v8hi)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srav_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2063,7 +2064,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srav_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2071,7 +2072,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sra_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2079,7 +2080,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sra_epi16(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2087,7 +2088,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sra_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2095,7 +2096,7 @@
                                           (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sra_epi16(__mmask16 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2103,7 +2104,7 @@
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srai_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2111,7 +2112,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srai_epi16(__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2119,7 +2120,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srai_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2127,7 +2128,7 @@
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srai_epi16(__mmask16 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2135,7 +2136,7 @@
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srl_epi16(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2143,7 +2144,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srl_epi16 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2151,7 +2152,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srl_epi16(__m256i __W, __mmask16 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2159,7 +2160,7 @@
                                           (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srl_epi16(__mmask16 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2167,7 +2168,7 @@
                                           (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srli_epi16(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2175,7 +2176,7 @@
                                              (__v8hi)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srli_epi16 (__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__U,
@@ -2183,7 +2184,7 @@
                                              (__v8hi)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srli_epi16(__m256i __W, __mmask16 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2191,7 +2192,7 @@
                                          (__v16hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srli_epi16(__mmask16 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectw_256((__mmask16)__U,
@@ -2199,7 +2200,7 @@
                                          (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mov_epi16 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2207,7 +2208,7 @@
                 (__v8hi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mov_epi16 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectw_128 ((__mmask8) __U,
@@ -2215,7 +2216,7 @@
                 (__v8hi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mov_epi16 (__m256i __W, __mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2223,7 +2224,7 @@
                 (__v16hi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mov_epi16 (__mmask16 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 ((__mmask16) __U,
@@ -2231,7 +2232,7 @@
                 (__v16hi) _mm256_setzero_si256 ());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mov_epi8 (__m128i __W, __mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2239,7 +2240,7 @@
                 (__v16qi) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mov_epi8 (__mmask16 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectb_128 ((__mmask16) __U,
@@ -2247,7 +2248,7 @@
                 (__v16qi) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mov_epi8 (__m256i __W, __mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2255,7 +2256,7 @@
                 (__v32qi) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mov_epi8 (__mmask32 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectb_256 ((__mmask32) __U,
@@ -2264,7 +2265,7 @@
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_set1_epi8 (__m128i __O, __mmask16 __M, char __A)
 {
   return (__m128i) __builtin_ia32_selectb_128(__M,
@@ -2272,7 +2273,7 @@
                                               (__v16qi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_set1_epi8 (__mmask16 __M, char __A)
 {
  return (__m128i) __builtin_ia32_selectb_128(__M,
@@ -2280,7 +2281,7 @@
                                              (__v16qi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_set1_epi8 (__m256i __O, __mmask32 __M, char __A)
 {
   return (__m256i) __builtin_ia32_selectb_256(__M,
@@ -2288,7 +2289,7 @@
                                               (__v32qi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_set1_epi8 (__mmask32 __M, char __A)
 {
   return (__m256i) __builtin_ia32_selectb_256(__M,
@@ -2296,7 +2297,7 @@
                                               (__v32qi) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_loadu_epi16 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
@@ -2304,7 +2305,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_loadu_epi16 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquhi128_mask ((__v8hi *) __P,
@@ -2313,7 +2314,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_loadu_epi16 (__m256i __W, __mmask16 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
@@ -2321,7 +2322,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_loadu_epi16 (__mmask16 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquhi256_mask ((__v16hi *) __P,
@@ -2330,7 +2331,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_loadu_epi8 (__m128i __W, __mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
@@ -2338,7 +2339,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_loadu_epi8 (__mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddquqi128_mask ((__v16qi *) __P,
@@ -2347,7 +2348,7 @@
                  (__mmask16) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_loadu_epi8 (__m256i __W, __mmask32 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
@@ -2355,7 +2356,7 @@
                  (__mmask32) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_loadu_epi8 (__mmask32 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddquqi256_mask ((__v32qi *) __P,
@@ -2363,7 +2364,7 @@
                  _mm256_setzero_si256 (),
                  (__mmask32) __U);
 }
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm_mask_storeu_epi16 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_storedquhi128_mask ((__v8hi *) __P,
@@ -2371,7 +2372,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_storeu_epi16 (void *__P, __mmask16 __U, __m256i __A)
 {
   __builtin_ia32_storedquhi256_mask ((__v16hi *) __P,
@@ -2379,7 +2380,7 @@
              (__mmask16) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_storeu_epi8 (void *__P, __mmask16 __U, __m128i __A)
 {
   __builtin_ia32_storedquqi128_mask ((__v16qi *) __P,
@@ -2387,7 +2388,7 @@
              (__mmask16) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_storeu_epi8 (void *__P, __mmask32 __U, __m256i __A)
 {
   __builtin_ia32_storedquqi256_mask ((__v32qi *) __P,
@@ -2395,162 +2396,162 @@
              (__mmask32) __U);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_test_epi8_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpneq_epi8_mask (_mm_and_si128(__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_mask_test_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpneq_epi8_mask (__U, _mm_and_si128 (__A, __B),
                                     _mm_setzero_si128());
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_test_epi8_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpneq_epi8_mask (_mm256_and_si256(__A, __B),
                                   _mm256_setzero_si256());
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_mask_test_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpneq_epi8_mask (__U, _mm256_and_si256(__A, __B),
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_test_epi16_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpneq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_mask_test_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpneq_epi16_mask (__U, _mm_and_si128 (__A, __B),
                                      _mm_setzero_si128());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
 _mm256_test_epi16_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpneq_epi16_mask (_mm256_and_si256 (__A, __B),
                                    _mm256_setzero_si256 ());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
 _mm256_mask_test_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpneq_epi16_mask (__U, _mm256_and_si256(__A, __B),
                                         _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_testn_epi8_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpeq_epi8_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_mask_testn_epi8_mask (__mmask16 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpeq_epi8_mask (__U, _mm_and_si128 (__A, __B),
                                   _mm_setzero_si128());
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_testn_epi8_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpeq_epi8_mask (_mm256_and_si256 (__A, __B),
                                  _mm256_setzero_si256());
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_mask_testn_epi8_mask (__mmask32 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpeq_epi8_mask (__U, _mm256_and_si256 (__A, __B),
                                       _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_testn_epi16_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpeq_epi16_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_mask_testn_epi16_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpeq_epi16_mask (__U, _mm_and_si128(__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
 _mm256_testn_epi16_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpeq_epi16_mask (_mm256_and_si256(__A, __B),
                                   _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
 _mm256_mask_testn_epi16_mask (__mmask16 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpeq_epi16_mask (__U, _mm256_and_si256 (__A, __B),
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS128
 _mm_movepi8_mask (__m128i __A)
 {
   return (__mmask16) __builtin_ia32_cvtb2mask128 ((__v16qi) __A);
 }
 
-static __inline__ __mmask32 __DEFAULT_FN_ATTRS
+static __inline__ __mmask32 __DEFAULT_FN_ATTRS256
 _mm256_movepi8_mask (__m256i __A)
 {
   return (__mmask32) __builtin_ia32_cvtb2mask256 ((__v32qi) __A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_movepi16_mask (__m128i __A)
 {
   return (__mmask8) __builtin_ia32_cvtw2mask128 ((__v8hi) __A);
 }
 
-static __inline__ __mmask16 __DEFAULT_FN_ATTRS
+static __inline__ __mmask16 __DEFAULT_FN_ATTRS256
 _mm256_movepi16_mask (__m256i __A)
 {
   return (__mmask16) __builtin_ia32_cvtw2mask256 ((__v16hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_movm_epi8 (__mmask16 __A)
 {
   return (__m128i) __builtin_ia32_cvtmask2b128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_movm_epi8 (__mmask32 __A)
 {
   return (__m256i) __builtin_ia32_cvtmask2b256 (__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_movm_epi16 (__mmask8 __A)
 {
   return (__m128i) __builtin_ia32_cvtmask2w128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_movm_epi16 (__mmask16 __A)
 {
   return (__m256i) __builtin_ia32_cvtmask2w256 (__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_broadcastb_epi8 (__m128i __O, __mmask16 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectb_128(__M,
@@ -2558,7 +2559,7 @@
                                              (__v16qi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_broadcastb_epi8 (__mmask16 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectb_128(__M,
@@ -2566,7 +2567,7 @@
                                              (__v16qi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcastb_epi8 (__m256i __O, __mmask32 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectb_256(__M,
@@ -2574,7 +2575,7 @@
                                              (__v32qi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcastb_epi8 (__mmask32 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectb_256(__M,
@@ -2582,7 +2583,7 @@
                                              (__v32qi) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_broadcastw_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128(__M,
@@ -2590,7 +2591,7 @@
                                              (__v8hi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_broadcastw_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectw_128(__M,
@@ -2598,7 +2599,7 @@
                                              (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcastw_epi16 (__m256i __O, __mmask16 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256(__M,
@@ -2606,7 +2607,7 @@
                                              (__v16hi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcastw_epi16 (__mmask16 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectw_256(__M,
@@ -2614,7 +2615,7 @@
                                              (__v16hi) _mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_set1_epi16 (__m256i __O, __mmask16 __M, short __A)
 {
   return (__m256i) __builtin_ia32_selectw_256 (__M,
@@ -2622,7 +2623,7 @@
                                                (__v16hi) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_set1_epi16 (__mmask16 __M, short __A)
 {
   return (__m256i) __builtin_ia32_selectw_256(__M,
@@ -2630,7 +2631,7 @@
                                               (__v16hi) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_set1_epi16 (__m128i __O, __mmask8 __M, short __A)
 {
   return (__m128i) __builtin_ia32_selectw_128(__M,
@@ -2638,7 +2639,7 @@
                                               (__v8hi) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_set1_epi16 (__mmask8 __M, short __A)
 {
   return (__m128i) __builtin_ia32_selectw_128(__M,
@@ -2646,13 +2647,13 @@
                                               (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_permutexvar_epi16 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_permvarhi128((__v8hi) __B, (__v8hi) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_permutexvar_epi16 (__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectw_128((__mmask8)__M,
@@ -2660,7 +2661,7 @@
                                         (__v8hi) _mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_permutexvar_epi16 (__m128i __W, __mmask8 __M, __m128i __A,
           __m128i __B)
 {
@@ -2669,13 +2670,13 @@
                                         (__v8hi)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutexvar_epi16 (__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_permvarhi256((__v16hi) __B, (__v16hi) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_epi16 (__mmask16 __M, __m256i __A,
         __m256i __B)
 {
@@ -2684,7 +2685,7 @@
                                     (__v16hi)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_permutexvar_epi16 (__m256i __W, __mmask16 __M, __m256i __A,
              __m256i __B)
 {
@@ -2741,6 +2742,7 @@
                                   (__v16hi)_mm256_dbsad_epu8((A), (B), (imm)), \
                                   (__v16hi)_mm256_setzero_si256())
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __AVX512VLBWINTRIN_H */
Index: lib/Headers/avx512vlcdintrin.h
===================================================================
--- lib/Headers/avx512vlcdintrin.h
+++ lib/Headers/avx512vlcdintrin.h
@@ -28,35 +28,36 @@
 #define __AVX512VLCDINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512cd"), __min_vector_width__(256)))
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastmb_epi64 (__mmask8 __A)
 { 
   return (__m128i) _mm_set1_epi64x((long long) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastmb_epi64 (__mmask8 __A)
 {
   return (__m256i) _mm256_set1_epi64x((long long)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcastmw_epi32 (__mmask16 __A)
 {
   return (__m128i) _mm_set1_epi32((int)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcastmw_epi32 (__mmask16 __A)
 {
   return (__m256i) _mm256_set1_epi32((int)__A);
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_conflict_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
@@ -64,7 +65,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_conflict_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
@@ -72,7 +73,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_conflict_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictdi_128_mask ((__v2di) __A,
@@ -81,7 +82,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_conflict_epi64 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
@@ -89,7 +90,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_conflict_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
@@ -97,7 +98,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_conflict_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictdi_256_mask ((__v4di) __A,
@@ -105,7 +106,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_conflict_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
@@ -113,7 +114,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_conflict_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
@@ -121,7 +122,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_conflict_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_vpconflictsi_128_mask ((__v4si) __A,
@@ -129,7 +130,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_conflict_epi32 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
@@ -137,7 +138,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_conflict_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
@@ -145,7 +146,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_conflict_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_vpconflictsi_256_mask ((__v8si) __A,
@@ -154,13 +155,13 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_lzcnt_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntd_128 ((__v4si) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_lzcnt_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -168,7 +169,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_lzcnt_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -176,13 +177,13 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_lzcnt_epi32 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vplzcntd_256 ((__v8si) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_lzcnt_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -190,7 +191,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_lzcnt_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -198,13 +199,13 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_lzcnt_epi64 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_vplzcntq_128 ((__v2di) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_lzcnt_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -212,7 +213,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_lzcnt_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -220,13 +221,13 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_lzcnt_epi64 (__m256i __A)
 {
   return (__m256i) __builtin_ia32_vplzcntq_256 ((__v4di) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_lzcnt_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -234,7 +235,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_lzcnt_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -242,6 +243,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __AVX512VLCDINTRIN_H */
Index: lib/Headers/avx512vldqintrin.h
===================================================================
--- lib/Headers/avx512vldqintrin.h
+++ lib/Headers/avx512vldqintrin.h
@@ -29,760 +29,761 @@
 #define __AVX512VLDQINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512dq"), __min_vector_width__(256)))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mullo_epi64 (__m256i __A, __m256i __B) {
   return (__m256i) ((__v4du) __A * (__v4du) __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mullo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_mullo_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mullo_epi64(__mmask8 __U, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_mullo_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mullo_epi64 (__m128i __A, __m128i __B) {
   return (__m128i) ((__v2du) __A * (__v2du) __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mullo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_mullo_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mullo_epi64(__mmask8 __U, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_mullo_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_andnot_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_andnot_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_andnot_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_andnot_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_andnot_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_andnot_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_andnot_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_andnot_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_andnot_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_andnot_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_andnot_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_andnot_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_andnot_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_andnot_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_andnot_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_andnot_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_and_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_and_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_and_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_and_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_and_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_and_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_and_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_and_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_and_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_and_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_and_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_and_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_and_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_and_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_and_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_and_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_xor_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_xor_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_xor_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_xor_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_xor_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_xor_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_xor_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_xor_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_xor_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_xor_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_xor_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_xor_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_xor_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_xor_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_xor_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_xor_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_or_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_or_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_or_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_or_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_or_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_or_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_or_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_or_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_or_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_or_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_or_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_or_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_or_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_or_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_or_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_or_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtpd_epi64 (__m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtpd_epi64 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2qq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtpd_epi64 (__m256d __A) {
   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpd_epi64 (__mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvtpd2qq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtpd_epu64 (__m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtpd_epu64 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2uqq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtpd_epu64 (__m256d __A) {
   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpd_epu64 (__mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvtpd2uqq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtps_epi64 (__m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2qq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtps_epi64 (__m128 __A) {
   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtps_epi64 (__mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvtps2qq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtps_epu64 (__m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2uqq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtps_epu64 (__m128 __A) {
   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtps_epu64 (__mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvtps2uqq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_pd (__m128i __A) {
   return (__m128d)__builtin_convertvector((__v2di)__A, __v2df);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepi64_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi64_pd (__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepi64_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_pd (__m256i __A) {
   return (__m256d)__builtin_convertvector((__v4di)__A, __v4df);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepi64_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi64_pd (__mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepi64_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_ps (__m128i __A) {
   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
                 (__v4sf) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi64_ps (__mmask8 __U, __m128i __A) {
   return (__m128) __builtin_ia32_cvtqq2ps128_mask ((__v2di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_ps (__m256i __A) {
   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
                 (__v4sf) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi64_ps (__mmask8 __U, __m256i __A) {
   return (__m128) __builtin_ia32_cvtqq2ps256_mask ((__v4di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvttpd_epi64 (__m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttpd_epi64 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttpd_epi64 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2qq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvttpd_epi64 (__m256d __A) {
   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttpd_epi64 (__mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvttpd2qq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvttpd_epu64 (__m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttpd_epu64 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttpd_epu64 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2uqq128_mask ((__v2df) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvttpd_epu64 (__m256d __A) {
   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttpd_epu64 (__mmask8 __U, __m256d __A) {
   return (__m256i) __builtin_ia32_cvttpd2uqq256_mask ((__v4df) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvttps_epi64 (__m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttps_epi64 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2qq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvttps_epi64 (__m128 __A) {
   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttps_epi64 (__m256i __W, __mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttps_epi64 (__mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvttps2qq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvttps_epu64 (__m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttps_epu64 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2uqq128_mask ((__v4sf) __A,
                 (__v2di) _mm_setzero_si128(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvttps_epu64 (__m128 __A) {
   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttps_epu64 (__m256i __W, __mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttps_epu64 (__mmask8 __U, __m128 __A) {
   return (__m256i) __builtin_ia32_cvttps2uqq256_mask ((__v4sf) __A,
                 (__v4di) _mm256_setzero_si256(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_cvtepu64_pd (__m128i __A) {
   return (__m128d)__builtin_convertvector((__v2du)__A, __v2df);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepu64_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepu64_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepu64_pd (__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtepu64_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_cvtepu64_pd (__m256i __A) {
   return (__m256d)__builtin_convertvector((__v4du)__A, __v4df);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepu64_pd (__m256d __W, __mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepu64_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepu64_pd (__mmask8 __U, __m256i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtepu64_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtepu64_ps (__m128i __A) {
   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
                 (__v4sf) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepu64_ps (__mmask8 __U, __m128i __A) {
   return (__m128) __builtin_ia32_cvtuqq2ps128_mask ((__v2di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_cvtepu64_ps (__m256i __A) {
   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
                 (__v4sf) _mm_setzero_ps(),
                 (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepu64_ps (__m128 __W, __mmask8 __U, __m256i __A) {
   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
                 (__v4sf) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepu64_ps (__mmask8 __U, __m256i __A) {
   return (__m128) __builtin_ia32_cvtuqq2ps256_mask ((__v4di) __A,
                 (__v4sf) _mm_setzero_ps(),
@@ -919,62 +920,62 @@
                                           (__v8sf)_mm256_setzero_ps(), \
                                           (__mmask8)(U))
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_movepi32_mask (__m128i __A)
 {
   return (__mmask8) __builtin_ia32_cvtd2mask128 ((__v4si) __A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_movepi32_mask (__m256i __A)
 {
   return (__mmask8) __builtin_ia32_cvtd2mask256 ((__v8si) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_movm_epi32 (__mmask8 __A)
 {
   return (__m128i) __builtin_ia32_cvtmask2d128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_movm_epi32 (__mmask8 __A)
 {
   return (__m256i) __builtin_ia32_cvtmask2d256 (__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_movm_epi64 (__mmask8 __A)
 {
   return (__m128i) __builtin_ia32_cvtmask2q128 (__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_movm_epi64 (__mmask8 __A)
 {
   return (__m256i) __builtin_ia32_cvtmask2q256 (__A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_movepi64_mask (__m128i __A)
 {
   return (__mmask8) __builtin_ia32_cvtq2mask128 ((__v2di) __A);
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_movepi64_mask (__m256i __A)
 {
   return (__mmask8) __builtin_ia32_cvtq2mask256 ((__v4di) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_broadcast_f32x2 (__m128 __A)
 {
   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
                                          0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcast_f32x2 (__m256 __O, __mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -982,7 +983,7 @@
                                              (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcast_f32x2 (__mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -990,14 +991,14 @@
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_broadcast_f64x2(__m128d __A)
 {
   return (__m256d)__builtin_shufflevector((__v2df)__A, (__v2df)__A,
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcast_f64x2(__m256d __O, __mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
@@ -1005,7 +1006,7 @@
                                             (__v4df)__O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcast_f64x2 (__mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__M,
@@ -1013,14 +1014,14 @@
                                             (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_broadcast_i32x2 (__m128i __A)
 {
   return (__m128i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_broadcast_i32x2 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -1028,7 +1029,7 @@
                                              (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -1036,14 +1037,14 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcast_i32x2 (__m128i __A)
 {
   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 0, 1, 0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcast_i32x2 (__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -1051,7 +1052,7 @@
                                              (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcast_i32x2 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -1059,14 +1060,14 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcast_i64x2(__m128i __A)
 {
   return (__m256i)__builtin_shufflevector((__v2di)__A, (__v2di)__A,
                                           0, 1, 0, 1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcast_i64x2(__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -1074,7 +1075,7 @@
                                             (__v4di)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcast_i64x2 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -1178,6 +1179,7 @@
   (__mmask8)__builtin_ia32_fpclassps256_mask((__v8sf)(__m256)(A), (int)(imm), \
                                              (__mmask8)-1)
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avx512vlintrin.h
===================================================================
--- lib/Headers/avx512vlintrin.h
+++ lib/Headers/avx512vlintrin.h
@@ -28,7 +28,8 @@
 #ifndef __AVX512VLINTRIN_H
 #define __AVX512VLINTRIN_H
 
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl"), __min_vector_width__(256)))
 
 /* Integer compare */
 
@@ -232,7 +233,7 @@
 #define _mm256_mask_cmpneq_epu64_mask(k, A, B) \
     _mm256_mask_cmp_epu64_mask((k), (A), (B), _MM_CMPINT_NE)
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_add_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -240,7 +241,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_add_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -248,7 +249,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_add_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -256,7 +257,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_add_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -264,7 +265,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sub_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -272,7 +273,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sub_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -280,7 +281,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sub_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -288,7 +289,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sub_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -296,7 +297,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_add_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -304,7 +305,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_add_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -312,7 +313,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_add_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -320,7 +321,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_add_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -328,7 +329,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sub_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -336,7 +337,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sub_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -344,7 +345,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sub_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -352,7 +353,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sub_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -360,7 +361,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mul_epi32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -368,7 +369,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mul_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -376,7 +377,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mul_epi32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -384,7 +385,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mul_epi32(__mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -392,7 +393,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mul_epu32(__m256i __W, __mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -400,7 +401,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mul_epu32(__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -408,7 +409,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mul_epu32(__m128i __W, __mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -416,7 +417,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mul_epu32(__mmask8 __M, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
@@ -424,7 +425,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mullo_epi32(__mmask8 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -432,7 +433,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mullo_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -440,7 +441,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mullo_epi32(__mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -448,7 +449,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mullo_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -456,7 +457,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_and_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -464,13 +465,13 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_and_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_and_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_and_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -478,13 +479,13 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_and_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_and_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_andnot_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -492,14 +493,14 @@
                                           (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_andnot_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_andnot_epi32(_mm256_setzero_si256(),
                                            __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_andnot_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -507,13 +508,13 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_andnot_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_andnot_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_or_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -521,13 +522,13 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_or_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_or_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_or_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -535,13 +536,13 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_or_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_or_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_xor_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -549,13 +550,13 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_xor_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_xor_epi32(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_xor_epi32(__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -564,13 +565,13 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_xor_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_xor_epi32(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_and_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -578,13 +579,13 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_and_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_and_epi64(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_and_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -592,13 +593,13 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_and_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_and_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_andnot_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -606,14 +607,14 @@
                                           (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_andnot_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_andnot_epi64(_mm256_setzero_si256(),
                                            __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_andnot_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -621,13 +622,13 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_andnot_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_andnot_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_or_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -635,13 +636,13 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_or_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_or_epi64(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_or_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -649,13 +650,13 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_or_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_or_epi64(_mm_setzero_si128(), __U, __A, __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_xor_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -663,13 +664,13 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_xor_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)_mm256_mask_xor_epi64(_mm256_setzero_si256(), __U, __A, __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_xor_epi64(__m128i __W, __mmask8 __U, __m128i __A,
         __m128i __B)
 {
@@ -678,7 +679,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_xor_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)_mm_mask_xor_epi64(_mm_setzero_si128(), __U, __A, __B);
@@ -804,7 +805,7 @@
                                          (__v2df)(__m128d)(b), (int)(p), \
                                          (__mmask8)(m))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -814,7 +815,7 @@
                     (__v2df) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -824,7 +825,7 @@
                     (__v2df) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -834,7 +835,7 @@
                     (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -844,7 +845,7 @@
                     (__v2df) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -854,7 +855,7 @@
                     (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -864,7 +865,7 @@
                     (__v2df) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -874,7 +875,7 @@
                     (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -884,7 +885,7 @@
                     (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_fmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -894,7 +895,7 @@
                     (__v4df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -904,7 +905,7 @@
                     (__v4df) __C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -914,7 +915,7 @@
                     (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_fmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -924,7 +925,7 @@
                     (__v4df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -934,7 +935,7 @@
                     (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask3_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -944,7 +945,7 @@
                     (__v4df) __C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_fnmadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -954,7 +955,7 @@
                     (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_fnmsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -964,7 +965,7 @@
                     (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -974,7 +975,7 @@
                     (__v4sf) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -984,7 +985,7 @@
                     (__v4sf) __C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -994,7 +995,7 @@
                     (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1004,7 +1005,7 @@
                     (__v4sf) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1014,7 +1015,7 @@
                     (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1024,7 +1025,7 @@
                     (__v4sf) __C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1034,7 +1035,7 @@
                     (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fnmsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1044,7 +1045,7 @@
                     (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_fmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1054,7 +1055,7 @@
                     (__v8sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1064,7 +1065,7 @@
                     (__v8sf) __C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1074,7 +1075,7 @@
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_fmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1084,7 +1085,7 @@
                     (__v8sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1094,7 +1095,7 @@
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask3_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1104,7 +1105,7 @@
                     (__v8sf) __C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_fnmadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1114,7 +1115,7 @@
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_fnmsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1124,7 +1125,7 @@
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fmaddsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1134,7 +1135,7 @@
                     (__v2df) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1144,7 +1145,7 @@
                     (__v2df) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fmaddsub_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1154,7 +1155,7 @@
                     (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fmsubadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1164,7 +1165,7 @@
                     (__v2df) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_fmsubadd_pd(__mmask8 __U, __m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1174,7 +1175,7 @@
                     (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_fmaddsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1184,7 +1185,7 @@
                     (__v4df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1194,7 +1195,7 @@
                     (__v4df) __C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmaddsub_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1204,7 +1205,7 @@
                     (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_fmsubadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1214,7 +1215,7 @@
                     (__v4df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmsubadd_pd(__mmask8 __U, __m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1224,7 +1225,7 @@
                     (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fmaddsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1234,7 +1235,7 @@
                     (__v4sf) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1244,7 +1245,7 @@
                     (__v4sf) __C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fmaddsub_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1254,7 +1255,7 @@
                     (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fmsubadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1264,7 +1265,7 @@
                     (__v4sf) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_fmsubadd_ps(__mmask8 __U, __m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1274,7 +1275,7 @@
                     (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_fmaddsub_ps(__m256 __A, __mmask8 __U, __m256 __B,
                          __m256 __C)
 {
@@ -1285,7 +1286,7 @@
                     (__v8sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1295,7 +1296,7 @@
                     (__v8sf) __C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmaddsub_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1305,7 +1306,7 @@
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_fmsubadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1315,7 +1316,7 @@
                     (__v8sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_fmsubadd_ps(__mmask8 __U, __m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1325,7 +1326,7 @@
                     (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1335,7 +1336,7 @@
                     (__v2df) __C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1345,7 +1346,7 @@
                     (__v4df) __C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1355,7 +1356,7 @@
                     (__v4sf) __C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1365,7 +1366,7 @@
                     (__v8sf) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1375,7 +1376,7 @@
                     (__v2df) __C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1385,7 +1386,7 @@
                     (__v4df) __C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1395,7 +1396,7 @@
                     (__v4sf) __C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask3_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1405,7 +1406,7 @@
                     (__v8sf) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fnmadd_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1415,7 +1416,7 @@
                     (__v2df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_fnmadd_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1425,7 +1426,7 @@
                     (__v4df) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fnmadd_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1435,7 +1436,7 @@
                     (__v4sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_fnmadd_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1445,7 +1446,7 @@
                     (__v8sf) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_fnmsub_pd(__m128d __A, __mmask8 __U, __m128d __B, __m128d __C)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1455,7 +1456,7 @@
                     (__v2df) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C, __mmask8 __U)
 {
   return (__m128d) __builtin_ia32_selectpd_128((__mmask8) __U,
@@ -1465,7 +1466,7 @@
                     (__v2df) __C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_fnmsub_pd(__m256d __A, __mmask8 __U, __m256d __B, __m256d __C)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1475,7 +1476,7 @@
                     (__v4df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask3_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C, __mmask8 __U)
 {
   return (__m256d) __builtin_ia32_selectpd_256((__mmask8) __U,
@@ -1485,7 +1486,7 @@
                     (__v4df) __C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_fnmsub_ps(__m128 __A, __mmask8 __U, __m128 __B, __m128 __C)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1495,7 +1496,7 @@
                     (__v4sf) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask3_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C, __mmask8 __U)
 {
   return (__m128) __builtin_ia32_selectps_128((__mmask8) __U,
@@ -1505,7 +1506,7 @@
                     (__v4sf) __C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_fnmsub_ps(__m256 __A, __mmask8 __U, __m256 __B, __m256 __C)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1515,7 +1516,7 @@
                     (__v8sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask3_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C, __mmask8 __U)
 {
   return (__m256) __builtin_ia32_selectps_256((__mmask8) __U,
@@ -1525,126 +1526,126 @@
                     (__v8sf) __C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_add_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_add_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_add_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_add_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_add_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_add_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_add_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_add_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_add_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_add_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_add_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_add_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_add_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_add_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_add_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_add_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_blend_epi32 (__mmask8 __U, __m128i __A, __m128i __W) {
   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
                 (__v4si) __W,
                 (__v4si) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_blend_epi32 (__mmask8 __U, __m256i __A, __m256i __W) {
   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
                 (__v8si) __W,
                 (__v8si) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_blend_pd (__mmask8 __U, __m128d __A, __m128d __W) {
   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
                  (__v2df) __W,
                  (__v2df) __A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_blend_pd (__mmask8 __U, __m256d __A, __m256d __W) {
   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
                  (__v4df) __W,
                  (__v4df) __A);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_blend_ps (__mmask8 __U, __m128 __A, __m128 __W) {
   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
                 (__v4sf) __W,
                 (__v4sf) __A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_blend_ps (__mmask8 __U, __m256 __A, __m256 __W) {
   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
                 (__v8sf) __W,
                 (__v8sf) __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_blend_epi64 (__mmask8 __U, __m128i __A, __m128i __W) {
   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
                 (__v2di) __W,
                 (__v2di) __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_blend_epi64 (__mmask8 __U, __m256i __A, __m256i __W) {
   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
                 (__v4di) __W,
                 (__v4di) __A);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_compress_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
                   (__v2df) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_compress_pd (__mmask8 __U, __m128d __A) {
   return (__m128d) __builtin_ia32_compressdf128_mask ((__v2df) __A,
                   (__v2df)
@@ -1652,14 +1653,14 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_compress_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
                   (__v4df) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_compress_pd (__mmask8 __U, __m256d __A) {
   return (__m256d) __builtin_ia32_compressdf256_mask ((__v4df) __A,
                   (__v4df)
@@ -1667,14 +1668,14 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_compress_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
                   (__v2di) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_compress_epi64 (__mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_compressdi128_mask ((__v2di) __A,
                   (__v2di)
@@ -1682,14 +1683,14 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_compress_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
                   (__v4di) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_compress_epi64 (__mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_compressdi256_mask ((__v4di) __A,
                   (__v4di)
@@ -1697,14 +1698,14 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_compress_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
                  (__v4sf) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_compress_ps (__mmask8 __U, __m128 __A) {
   return (__m128) __builtin_ia32_compresssf128_mask ((__v4sf) __A,
                  (__v4sf)
@@ -1712,14 +1713,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_compress_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
                  (__v8sf) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_compress_ps (__mmask8 __U, __m256 __A) {
   return (__m256) __builtin_ia32_compresssf256_mask ((__v8sf) __A,
                  (__v8sf)
@@ -1727,14 +1728,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_compress_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
                   (__v4si) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_compress_epi32 (__mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_compresssi128_mask ((__v4si) __A,
                   (__v4si)
@@ -1742,14 +1743,14 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_compress_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
                   (__v8si) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_compress_epi32 (__mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_compresssi256_mask ((__v8si) __A,
                   (__v8si)
@@ -1757,126 +1758,126 @@
                   (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m128d __A) {
   __builtin_ia32_compressstoredf128_mask ((__v2df *) __P,
             (__v2df) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_compressstoreu_pd (void *__P, __mmask8 __U, __m256d __A) {
   __builtin_ia32_compressstoredf256_mask ((__v4df *) __P,
             (__v4df) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m128i __A) {
   __builtin_ia32_compressstoredi128_mask ((__v2di *) __P,
             (__v2di) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_compressstoreu_epi64 (void *__P, __mmask8 __U, __m256i __A) {
   __builtin_ia32_compressstoredi256_mask ((__v4di *) __P,
             (__v4di) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m128 __A) {
   __builtin_ia32_compressstoresf128_mask ((__v4sf *) __P,
             (__v4sf) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_compressstoreu_ps (void *__P, __mmask8 __U, __m256 __A) {
   __builtin_ia32_compressstoresf256_mask ((__v8sf *) __P,
             (__v8sf) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m128i __A) {
   __builtin_ia32_compressstoresi128_mask ((__v4si *) __P,
             (__v4si) __A,
             (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_compressstoreu_epi32 (void *__P, __mmask8 __U, __m256i __A) {
   __builtin_ia32_compressstoresi256_mask ((__v8si *) __P,
             (__v8si) __A,
             (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepi32_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepi32_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepi32_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi32_pd (__mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepi32_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepi32_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi32_ps (__mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepi32_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepi32_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi32_ps (__mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepi32_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
                 (__v4si) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtpd_epi32 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2dq128_mask ((__v2df) __A,
                 (__v4si)
@@ -1884,28 +1885,28 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm256_cvtpd_epi32(__A),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpd_epi32 (__mmask8 __U, __m256d __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm256_cvtpd_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf) __W,
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtpd_ps (__mmask8 __U, __m128d __A) {
   return (__m128) __builtin_ia32_cvtpd2ps_mask ((__v2df) __A,
             (__v4sf)
@@ -1913,21 +1914,21 @@
             (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtpd_ps (__m128 __W, __mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpd_ps (__mmask8 __U, __m256d __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm256_cvtpd_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtpd_epu32 (__m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
                  (__v4si)
@@ -1935,14 +1936,14 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
                  (__v4si) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtpd_epu32 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvtpd2udq128_mask ((__v2df) __A,
                  (__v4si)
@@ -1950,7 +1951,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtpd_epu32 (__m256d __A) {
   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
                  (__v4si)
@@ -1958,14 +1959,14 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
                  (__v4si) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtpd_epu32 (__mmask8 __U, __m256d __A) {
   return (__m128i) __builtin_ia32_cvtpd2udq256_mask ((__v4df) __A,
                  (__v4si)
@@ -1973,63 +1974,63 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_cvtps_epi32(__A),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtps_epi32 (__mmask8 __U, __m128 __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_cvtps_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_cvtps_epi32(__A),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtps_epi32 (__mmask8 __U, __m256 __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_cvtps_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_cvtps_pd (__m128d __W, __mmask8 __U, __m128 __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtps_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_cvtps_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtps_pd (__m256d __W, __mmask8 __U, __m128 __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtps_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtps_pd (__mmask8 __U, __m128 __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_cvtps_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtps_epu32 (__m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
                  (__v4si)
@@ -2037,14 +2038,14 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
                  (__v4si) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtps_epu32 (__mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvtps2udq128_mask ((__v4sf) __A,
                  (__v4si)
@@ -2052,7 +2053,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvtps_epu32 (__m256 __A) {
   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
                  (__v8si)
@@ -2060,14 +2061,14 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
                  (__v8si) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtps_epu32 (__mmask8 __U, __m256 __A) {
   return (__m256i) __builtin_ia32_cvtps2udq256_mask ((__v8sf) __A,
                  (__v8si)
@@ -2075,14 +2076,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
                  (__v4si) __W,
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttpd_epi32 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2dq128_mask ((__v2df) __A,
                  (__v4si)
@@ -2090,21 +2091,21 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm256_cvttpd_epi32(__A),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttpd_epi32 (__mmask8 __U, __m256d __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm256_cvttpd_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvttpd_epu32 (__m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
                   (__v4si)
@@ -2112,14 +2113,14 @@
                   (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
                   (__v4si) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttpd_epu32 (__mmask8 __U, __m128d __A) {
   return (__m128i) __builtin_ia32_cvttpd2udq128_mask ((__v2df) __A,
                   (__v4si)
@@ -2127,7 +2128,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvttpd_epu32 (__m256d __A) {
   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
                   (__v4si)
@@ -2135,14 +2136,14 @@
                   (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A) {
   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
                   (__v4si) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttpd_epu32 (__mmask8 __U, __m256d __A) {
   return (__m128i) __builtin_ia32_cvttpd2udq256_mask ((__v4df) __A,
                   (__v4si)
@@ -2150,35 +2151,35 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_cvttps_epi32(__A),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttps_epi32 (__mmask8 __U, __m128 __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_cvttps_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttps_epi32 (__m256i __W, __mmask8 __U, __m256 __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_cvttps_epi32(__A),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttps_epi32 (__mmask8 __U, __m256 __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_cvttps_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvttps_epu32 (__m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
                   (__v4si)
@@ -2186,14 +2187,14 @@
                   (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
                   (__v4si) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvttps_epu32 (__mmask8 __U, __m128 __A) {
   return (__m128i) __builtin_ia32_cvttps2udq128_mask ((__v4sf) __A,
                   (__v4si)
@@ -2201,7 +2202,7 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cvttps_epu32 (__m256 __A) {
   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
                   (__v8si)
@@ -2209,14 +2210,14 @@
                   (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvttps_epu32 (__m256i __W, __mmask8 __U, __m256 __A) {
   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
                   (__v8si) __W,
                   (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvttps_epu32 (__mmask8 __U, __m256 __A) {
   return (__m256i) __builtin_ia32_cvttps2udq256_mask ((__v8sf) __A,
                   (__v8si)
@@ -2224,147 +2225,147 @@
                   (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_cvtepu32_pd (__m128i __A) {
   return (__m128d) __builtin_convertvector(
       __builtin_shufflevector((__v4su)__A, (__v4su)__A, 0, 1), __v2df);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepu32_pd (__m128d __W, __mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepu32_pd(__A),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8) __U,
                                               (__v2df)_mm_cvtepu32_pd(__A),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_cvtepu32_pd (__m128i __A) {
   return (__m256d)__builtin_convertvector((__v4su)__A, __v4df);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepu32_pd (__m256d __W, __mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepu32_pd(__A),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepu32_pd (__mmask8 __U, __m128i __A) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8) __U,
                                               (__v4df)_mm256_cvtepu32_pd(__A),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtepu32_ps (__m128i __A) {
   return (__m128)__builtin_convertvector((__v4su)__A, __v4sf);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepu32_ps (__m128 __W, __mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepu32_ps(__A),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepu32_ps (__mmask8 __U, __m128i __A) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_cvtepu32_ps(__A),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtepu32_ps (__m256i __A) {
   return (__m256)__builtin_convertvector((__v8su)__A, __v8sf);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepu32_ps (__m256 __W, __mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepu32_ps(__A),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepu32_ps (__mmask8 __U, __m256i __A) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_cvtepu32_ps(__A),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_div_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_div_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_div_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_div_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_div_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_div_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_div_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_div_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_div_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_div_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_div_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_div_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_div_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_div_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_div_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_div_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_expand_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
                 (__v2df) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_expand_pd (__mmask8 __U, __m128d __A) {
   return (__m128d) __builtin_ia32_expanddf128_mask ((__v2df) __A,
                  (__v2df)
@@ -2372,14 +2373,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_expand_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
                 (__v4df) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_expand_pd (__mmask8 __U, __m256d __A) {
   return (__m256d) __builtin_ia32_expanddf256_mask ((__v4df) __A,
                  (__v4df)
@@ -2387,14 +2388,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expand_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
                 (__v2di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expand_epi64 (__mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_expanddi128_mask ((__v2di) __A,
                  (__v2di)
@@ -2402,14 +2403,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expand_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
                 (__v4di) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expand_epi64 (__mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_expanddi256_mask ((__v4di) __A,
                  (__v4di)
@@ -2417,7 +2418,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_expandloadu_pd (__m128d __W, __mmask8 __U, void const *__P) {
   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
               (__v2df) __W,
@@ -2425,7 +2426,7 @@
               __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
   return (__m128d) __builtin_ia32_expandloaddf128_mask ((__v2df *) __P,
                (__v2df)
@@ -2434,7 +2435,7 @@
                __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_expandloadu_pd (__m256d __W, __mmask8 __U, void const *__P) {
   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
               (__v4df) __W,
@@ -2442,7 +2443,7 @@
               __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_expandloadu_pd (__mmask8 __U, void const *__P) {
   return (__m256d) __builtin_ia32_expandloaddf256_mask ((__v4df *) __P,
                (__v4df)
@@ -2451,7 +2452,7 @@
                __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expandloadu_epi64 (__m128i __W, __mmask8 __U, void const *__P) {
   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
               (__v2di) __W,
@@ -2459,7 +2460,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
   return (__m128i) __builtin_ia32_expandloaddi128_mask ((__v2di *) __P,
                (__v2di)
@@ -2468,7 +2469,7 @@
                __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expandloadu_epi64 (__m256i __W, __mmask8 __U,
              void const *__P) {
   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
@@ -2477,7 +2478,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expandloadu_epi64 (__mmask8 __U, void const *__P) {
   return (__m256i) __builtin_ia32_expandloaddi256_mask ((__v4di *) __P,
                (__v4di)
@@ -2486,14 +2487,14 @@
                __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_expandloadu_ps (__m128 __W, __mmask8 __U, void const *__P) {
   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
                    (__v4sf) __W,
                    (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
   return (__m128) __builtin_ia32_expandloadsf128_mask ((__v4sf *) __P,
               (__v4sf)
@@ -2502,14 +2503,14 @@
               __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_expandloadu_ps (__m256 __W, __mmask8 __U, void const *__P) {
   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
                    (__v8sf) __W,
                    (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_expandloadu_ps (__mmask8 __U, void const *__P) {
   return (__m256) __builtin_ia32_expandloadsf256_mask ((__v8sf *) __P,
               (__v8sf)
@@ -2518,7 +2519,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expandloadu_epi32 (__m128i __W, __mmask8 __U, void const *__P) {
   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
               (__v4si) __W,
@@ -2526,7 +2527,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
   return (__m128i) __builtin_ia32_expandloadsi128_mask ((__v4si *) __P,
                (__v4si)
@@ -2534,7 +2535,7 @@
                (__mmask8)     __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expandloadu_epi32 (__m256i __W, __mmask8 __U,
              void const *__P) {
   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
@@ -2543,7 +2544,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expandloadu_epi32 (__mmask8 __U, void const *__P) {
   return (__m256i) __builtin_ia32_expandloadsi256_mask ((__v8si *) __P,
                (__v8si)
@@ -2552,14 +2553,14 @@
                __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_expand_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
                (__v4sf) __W,
                (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_expand_ps (__mmask8 __U, __m128 __A) {
   return (__m128) __builtin_ia32_expandsf128_mask ((__v4sf) __A,
                 (__v4sf)
@@ -2567,14 +2568,14 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_expand_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
                (__v8sf) __W,
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_expand_ps (__mmask8 __U, __m256 __A) {
   return (__m256) __builtin_ia32_expandsf256_mask ((__v8sf) __A,
                 (__v8sf)
@@ -2582,14 +2583,14 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expand_epi32 (__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
                 (__v4si) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expand_epi32 (__mmask8 __U, __m128i __A) {
   return (__m128i) __builtin_ia32_expandsi128_mask ((__v4si) __A,
                  (__v4si)
@@ -2597,14 +2598,14 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expand_epi32 (__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
                 (__v8si) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expand_epi32 (__mmask8 __U, __m256i __A) {
   return (__m256i) __builtin_ia32_expandsi256_mask ((__v8si) __A,
                  (__v8si)
@@ -2612,7 +2613,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_getexp_pd (__m128d __A) {
   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
                 (__v2df)
@@ -2620,14 +2621,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_getexp_pd (__m128d __W, __mmask8 __U, __m128d __A) {
   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
                 (__v2df) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_getexp_pd (__mmask8 __U, __m128d __A) {
   return (__m128d) __builtin_ia32_getexppd128_mask ((__v2df) __A,
                 (__v2df)
@@ -2635,7 +2636,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_getexp_pd (__m256d __A) {
   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
                 (__v4df)
@@ -2643,14 +2644,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_getexp_pd (__m256d __W, __mmask8 __U, __m256d __A) {
   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
                 (__v4df) __W,
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_getexp_pd (__mmask8 __U, __m256d __A) {
   return (__m256d) __builtin_ia32_getexppd256_mask ((__v4df) __A,
                 (__v4df)
@@ -2658,7 +2659,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_getexp_ps (__m128 __A) {
   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
                (__v4sf)
@@ -2666,14 +2667,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_getexp_ps (__m128 __W, __mmask8 __U, __m128 __A) {
   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
                (__v4sf) __W,
                (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_getexp_ps (__mmask8 __U, __m128 __A) {
   return (__m128) __builtin_ia32_getexpps128_mask ((__v4sf) __A,
                (__v4sf)
@@ -2681,7 +2682,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_getexp_ps (__m256 __A) {
   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
                (__v8sf)
@@ -2689,14 +2690,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_getexp_ps (__m256 __W, __mmask8 __U, __m256 __A) {
   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
                (__v8sf) __W,
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_getexp_ps (__mmask8 __U, __m256 __A) {
   return (__m256) __builtin_ia32_getexpps256_mask ((__v8sf) __A,
                (__v8sf)
@@ -2704,498 +2705,498 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_max_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_max_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_max_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_max_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_max_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_max_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_max_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_max_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_max_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_max_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_max_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_max_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_max_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_max_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_min_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_min_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_min_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_min_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_min_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_min_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_min_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_min_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_min_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_min_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_min_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_min_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_min_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_min_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_mul_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_mul_pd(__A, __B),
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_mul_pd(__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                               (__v2df)_mm_mul_pd(__A, __B),
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_mul_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_mul_pd(__A, __B),
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_mul_pd(__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                               (__v4df)_mm256_mul_pd(__A, __B),
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_mul_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_mul_ps(__A, __B),
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_mul_ps(__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                              (__v4sf)_mm_mul_ps(__A, __B),
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_mul_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_mul_ps(__A, __B),
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_mul_ps(__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                              (__v8sf)_mm256_mul_ps(__A, __B),
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_abs_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_abs_epi32(__A),
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_abs_epi32(__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
                                              (__v4si)_mm_abs_epi32(__A),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_abs_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_abs_epi32(__A),
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_abs_epi32(__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
                                              (__v8si)_mm256_abs_epi32(__A),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_abs_epi64 (__m128i __A) {
   return (__m128i)__builtin_ia32_pabsq128((__v2di)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_abs_epi64 (__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_abs_epi64(__A),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_abs_epi64 (__mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
                                              (__v2di)_mm_abs_epi64(__A),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_abs_epi64 (__m256i __A) {
   return (__m256i)__builtin_ia32_pabsq256 ((__v4di)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_abs_epi64 (__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_abs_epi64(__A),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_abs_epi64 (__mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
                                              (__v4di)_mm256_abs_epi64(__A),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epi64 (__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_pmaxsq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epi64 (__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_pmaxsq256((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epu32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_max_epu32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epu32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_max_epu32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_max_epu64 (__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_pmaxuq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_max_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epu64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_max_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_max_epu64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_max_epu64 (__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_pmaxuq256((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_max_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epu64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_max_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_max_epu64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epi32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epi32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epi32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epi32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epi32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epi32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epi32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epi32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epi64 (__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_pminsq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epi64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epi64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epi64 (__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epi64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epi64 (__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_pminsq256((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epi64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epi64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epi64 (__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epi64(__A, __B),
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epu32(__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epu32(__A, __B),
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epu32(__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
                                              (__v4si)_mm_min_epu32(__A, __B),
                                              (__v4si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epu32(__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epu32(__A, __B),
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epu32(__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
                                              (__v8si)_mm256_min_epu32(__A, __B),
                                              (__v8si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_min_epu64 (__m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_pminuq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_min_epu64 (__m128i __W, __mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epu64(__A, __B),
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_min_epu64 (__mmask8 __M, __m128i __A, __m128i __B) {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__M,
                                              (__v2di)_mm_min_epu64(__A, __B),
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_min_epu64 (__m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_pminuq256((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_min_epu64 (__m256i __W, __mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epu64(__A, __B),
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_min_epu64 (__mmask8 __M, __m256i __A, __m256i __B) {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
                                              (__v4di)_mm256_min_epu64(__A, __B),
@@ -3276,7 +3277,7 @@
                                              (__v8sf)_mm256_setzero_ps(), \
                                              (__mmask8)(U))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_scalef_pd (__m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -3285,7 +3286,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_scalef_pd (__m128d __W, __mmask8 __U, __m128d __A,
         __m128d __B) {
   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
@@ -3294,7 +3295,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_scalef_pd (__mmask8 __U, __m128d __A, __m128d __B) {
   return (__m128d) __builtin_ia32_scalefpd128_mask ((__v2df) __A,
                 (__v2df) __B,
@@ -3303,7 +3304,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_scalef_pd (__m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
                 (__v4df) __B,
@@ -3312,7 +3313,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_scalef_pd (__m256d __W, __mmask8 __U, __m256d __A,
            __m256d __B) {
   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
@@ -3321,7 +3322,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_scalef_pd (__mmask8 __U, __m256d __A, __m256d __B) {
   return (__m256d) __builtin_ia32_scalefpd256_mask ((__v4df) __A,
                 (__v4df) __B,
@@ -3330,7 +3331,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_scalef_ps (__m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
                (__v4sf) __B,
@@ -3339,7 +3340,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_scalef_ps (__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
                (__v4sf) __B,
@@ -3347,7 +3348,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_scalef_ps (__mmask8 __U, __m128 __A, __m128 __B) {
   return (__m128) __builtin_ia32_scalefps128_mask ((__v4sf) __A,
                (__v4sf) __B,
@@ -3356,7 +3357,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_scalef_ps (__m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
                (__v8sf) __B,
@@ -3365,7 +3366,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_scalef_ps (__m256 __W, __mmask8 __U, __m256 __A,
            __m256 __B) {
   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
@@ -3374,7 +3375,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_scalef_ps (__mmask8 __U, __m256 __A, __m256 __B) {
   return (__m256) __builtin_ia32_scalefps256_mask ((__v8sf) __A,
                (__v8sf) __B,
@@ -3543,125 +3544,125 @@
                                  (__v8si)(__m256i)(index), \
                                  (__v8si)(__m256i)(v1), (int)(scale))
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_mask_sqrt_pd(__m128d __W, __mmask8 __U, __m128d __A) {
     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                                 (__v2df)_mm_sqrt_pd(__A),
                                                 (__v2df)__W);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_maskz_sqrt_pd(__mmask8 __U, __m128d __A) {
     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                                 (__v2df)_mm_sqrt_pd(__A),
                                                 (__v2df)_mm_setzero_pd());
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_mask_sqrt_pd(__m256d __W, __mmask8 __U, __m256d __A) {
     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                                 (__v4df)_mm256_sqrt_pd(__A),
                                                 (__v4df)__W);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_maskz_sqrt_pd(__mmask8 __U, __m256d __A) {
     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                                 (__v4df)_mm256_sqrt_pd(__A),
                                                 (__v4df)_mm256_setzero_pd());
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_mask_sqrt_ps(__m128 __W, __mmask8 __U, __m128 __A) {
     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                                (__v4sf)_mm_sqrt_ps(__A),
                                                (__v4sf)__W);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_maskz_sqrt_ps(__mmask8 __U, __m128 __A) {
     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                                (__v4sf)_mm_sqrt_ps(__A),
                                                (__v4sf)_mm_setzero_pd());
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_mask_sqrt_ps(__m256 __W, __mmask8 __U, __m256 __A) {
     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                                (__v8sf)_mm256_sqrt_ps(__A),
                                                (__v8sf)__W);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_maskz_sqrt_ps(__mmask8 __U, __m256 __A) {
     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                                (__v8sf)_mm256_sqrt_ps(__A),
                                                (__v8sf)_mm256_setzero_ps());
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_mask_sub_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                                 (__v2df)_mm_sub_pd(__A, __B),
                                                 (__v2df)__W);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_maskz_sub_pd(__mmask8 __U, __m128d __A, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
                                                 (__v2df)_mm_sub_pd(__A, __B),
                                                 (__v2df)_mm_setzero_pd());
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_mask_sub_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                                 (__v4df)_mm256_sub_pd(__A, __B),
                                                 (__v4df)__W);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_maskz_sub_pd(__mmask8 __U, __m256d __A, __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
                                                 (__v4df)_mm256_sub_pd(__A, __B),
                                                 (__v4df)_mm256_setzero_pd());
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_mask_sub_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                                (__v4sf)_mm_sub_ps(__A, __B),
                                                (__v4sf)__W);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_maskz_sub_ps(__mmask8 __U, __m128 __A, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
                                                (__v4sf)_mm_sub_ps(__A, __B),
                                                (__v4sf)_mm_setzero_ps());
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_mask_sub_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                                (__v8sf)_mm256_sub_ps(__A, __B),
                                                (__v8sf)__W);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_maskz_sub_ps(__mmask8 __U, __m256 __A, __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
                                                (__v8sf)_mm256_sub_ps(__A, __B),
                                                (__v8sf)_mm256_setzero_ps());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_permutex2var_epi32(__m128i __A, __m128i __I, __m128i __B) {
     return (__m128i)__builtin_ia32_vpermi2vard128((__v4si) __A, (__v4si)__I,
                                                   (__v4si)__B);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_permutex2var_epi32(__m128i __A, __mmask8 __U, __m128i __I,
                               __m128i __B) {
     return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3669,7 +3670,7 @@
                                     (__v4si)__A);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask2_permutex2var_epi32(__m128i __A, __m128i __I, __mmask8 __U,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3677,7 +3678,7 @@
                                     (__v4si)__I);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_permutex2var_epi32(__mmask8 __U, __m128i __A, __m128i __I,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -3685,13 +3686,13 @@
                                     (__v4si)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_permutex2var_epi32(__m256i __A, __m256i __I, __m256i __B) {
     return (__m256i)__builtin_ia32_vpermi2vard256((__v8si)__A, (__v8si) __I,
                                                   (__v8si) __B);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_permutex2var_epi32(__m256i __A, __mmask8 __U, __m256i __I,
                                  __m256i __B) {
     return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3699,7 +3700,7 @@
                                  (__v8si)__A);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask2_permutex2var_epi32(__m256i __A, __m256i __I, __mmask8 __U,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3707,7 +3708,7 @@
                                  (__v8si)__I);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_permutex2var_epi32(__mmask8 __U, __m256i __A, __m256i __I,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -3715,40 +3716,40 @@
                                  (__v8si)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_permutex2var_pd(__m128d __A, __m128i __I, __m128d __B) {
     return (__m128d)__builtin_ia32_vpermi2varpd128((__v2df)__A, (__v2di)__I,
                                                    (__v2df)__B);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_mask_permutex2var_pd(__m128d __A, __mmask8 __U, __m128i __I, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128(__U,
                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
                                        (__v2df)__A);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_mask2_permutex2var_pd(__m128d __A, __m128i __I, __mmask8 __U, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128(__U,
                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
                                        (__v2df)(__m128d)__I);
   }
 
-  static __inline__ __m128d __DEFAULT_FN_ATTRS
+  static __inline__ __m128d __DEFAULT_FN_ATTRS128
   _mm_maskz_permutex2var_pd(__mmask8 __U, __m128d __A, __m128i __I, __m128d __B) {
     return (__m128d)__builtin_ia32_selectpd_128(__U,
                                        (__v2df)_mm_permutex2var_pd(__A, __I, __B),
                                        (__v2df)_mm_setzero_pd());
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_permutex2var_pd(__m256d __A, __m256i __I, __m256d __B) {
     return (__m256d)__builtin_ia32_vpermi2varpd256((__v4df)__A, (__v4di)__I,
                                                    (__v4df)__B);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_mask_permutex2var_pd(__m256d __A, __mmask8 __U, __m256i __I,
                               __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3756,7 +3757,7 @@
                                     (__v4df)__A);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_mask2_permutex2var_pd(__m256d __A, __m256i __I, __mmask8 __U,
                                __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3764,7 +3765,7 @@
                                     (__v4df)(__m256d)__I);
   }
 
-  static __inline__ __m256d __DEFAULT_FN_ATTRS
+  static __inline__ __m256d __DEFAULT_FN_ATTRS256
   _mm256_maskz_permutex2var_pd(__mmask8 __U, __m256d __A, __m256i __I,
                                __m256d __B) {
     return (__m256d)__builtin_ia32_selectpd_256(__U,
@@ -3772,47 +3773,47 @@
                                     (__v4df)_mm256_setzero_pd());
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_permutex2var_ps(__m128 __A, __m128i __I, __m128 __B) {
     return (__m128)__builtin_ia32_vpermi2varps128((__v4sf)__A, (__v4si)__I,
                                                   (__v4sf)__B);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_mask_permutex2var_ps(__m128 __A, __mmask8 __U, __m128i __I, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128(__U,
                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
                                        (__v4sf)__A);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_mask2_permutex2var_ps(__m128 __A, __m128i __I, __mmask8 __U, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128(__U,
                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
                                        (__v4sf)(__m128)__I);
   }
 
-  static __inline__ __m128 __DEFAULT_FN_ATTRS
+  static __inline__ __m128 __DEFAULT_FN_ATTRS128
   _mm_maskz_permutex2var_ps(__mmask8 __U, __m128 __A, __m128i __I, __m128 __B) {
     return (__m128)__builtin_ia32_selectps_128(__U,
                                        (__v4sf)_mm_permutex2var_ps(__A, __I, __B),
                                        (__v4sf)_mm_setzero_ps());
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_permutex2var_ps(__m256 __A, __m256i __I, __m256 __B) {
     return (__m256)__builtin_ia32_vpermi2varps256((__v8sf)__A, (__v8si)__I,
                                                   (__v8sf) __B);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_mask_permutex2var_ps(__m256 __A, __mmask8 __U, __m256i __I, __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256(__U,
                                     (__v8sf)_mm256_permutex2var_ps(__A, __I, __B),
                                     (__v8sf)__A);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_mask2_permutex2var_ps(__m256 __A, __m256i __I, __mmask8 __U,
                                __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256(__U,
@@ -3820,7 +3821,7 @@
                                     (__v8sf)(__m256)__I);
   }
 
-  static __inline__ __m256 __DEFAULT_FN_ATTRS
+  static __inline__ __m256 __DEFAULT_FN_ATTRS256
   _mm256_maskz_permutex2var_ps(__mmask8 __U, __m256 __A, __m256i __I,
                                __m256 __B) {
     return (__m256)__builtin_ia32_selectps_256(__U,
@@ -3828,13 +3829,13 @@
                                     (__v8sf)_mm256_setzero_ps());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_permutex2var_epi64(__m128i __A, __m128i __I, __m128i __B) {
     return (__m128i)__builtin_ia32_vpermi2varq128((__v2di)__A, (__v2di)__I,
                                                   (__v2di)__B);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_permutex2var_epi64(__m128i __A, __mmask8 __U, __m128i __I,
                               __m128i __B) {
     return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3842,7 +3843,7 @@
                                     (__v2di)__A);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask2_permutex2var_epi64(__m128i __A, __m128i __I, __mmask8 __U,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3850,7 +3851,7 @@
                                     (__v2di)__I);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_permutex2var_epi64(__mmask8 __U, __m128i __A, __m128i __I,
                                __m128i __B) {
     return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -3859,13 +3860,13 @@
   }
 
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_permutex2var_epi64(__m256i __A, __m256i __I, __m256i __B) {
     return (__m256i)__builtin_ia32_vpermi2varq256((__v4di)__A, (__v4di) __I,
                                                   (__v4di) __B);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_permutex2var_epi64(__m256i __A, __mmask8 __U, __m256i __I,
                                  __m256i __B) {
     return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3873,7 +3874,7 @@
                                  (__v4di)__A);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask2_permutex2var_epi64(__m256i __A, __m256i __I, __mmask8 __U,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3881,7 +3882,7 @@
                                  (__v4di)__I);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_permutex2var_epi64(__mmask8 __U, __m256i __A, __m256i __I,
                                   __m256i __B) {
     return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -3889,7 +3890,7 @@
                                  (__v4di)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepi8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -3897,7 +3898,7 @@
                                                (__v4si)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepi8_epi32(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -3905,7 +3906,7 @@
                                                (__v4si)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepi8_epi32 (__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -3913,7 +3914,7 @@
                                                (__v8si)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepi8_epi32 (__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -3921,7 +3922,7 @@
                                                (__v8si)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepi8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -3929,7 +3930,7 @@
                                                (__v2di)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -3937,7 +3938,7 @@
                                                (__v2di)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepi8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -3945,7 +3946,7 @@
                                                (__v4di)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepi8_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -3953,7 +3954,7 @@
                                                (__v4di)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepi32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -3961,7 +3962,7 @@
                                                (__v2di)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -3969,7 +3970,7 @@
                                                (__v2di)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepi32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -3977,7 +3978,7 @@
                                                (__v4di)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepi32_epi64(__mmask8 __U, __m128i __X)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -3985,7 +3986,7 @@
                                                (__v4di)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepi16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -3993,7 +3994,7 @@
                                                (__v4si)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepi16_epi32(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4001,7 +4002,7 @@
                                                (__v4si)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepi16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4009,7 +4010,7 @@
                                                (__v8si)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepi16_epi32 (__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4017,7 +4018,7 @@
                                                (__v8si)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepi16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4025,7 +4026,7 @@
                                                (__v2di)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4033,7 +4034,7 @@
                                                (__v2di)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepi16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4041,7 +4042,7 @@
                                                (__v4di)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepi16_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4050,7 +4051,7 @@
   }
 
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepu8_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4058,7 +4059,7 @@
                                                (__v4si)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4066,7 +4067,7 @@
                                                (__v4si)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepu8_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4074,7 +4075,7 @@
                                                (__v8si)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepu8_epi32(__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4082,7 +4083,7 @@
                                                (__v8si)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepu8_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4090,7 +4091,7 @@
                                                (__v2di)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepu8_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4098,7 +4099,7 @@
                                                (__v2di)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepu8_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4106,7 +4107,7 @@
                                                (__v4di)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepu8_epi64 (__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4114,7 +4115,7 @@
                                                (__v4di)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepu32_epi64(__m128i __W, __mmask8 __U, __m128i __X)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4122,7 +4123,7 @@
                                                (__v2di)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4130,7 +4131,7 @@
                                                (__v2di)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepu32_epi64(__m256i __W, __mmask8 __U, __m128i __X)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4138,7 +4139,7 @@
                                                (__v4di)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepu32_epi64(__mmask8 __U, __m128i __X)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4146,7 +4147,7 @@
                                                (__v4di)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepu16_epi32(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4154,7 +4155,7 @@
                                                (__v4si)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4162,7 +4163,7 @@
                                                (__v4si)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepu16_epi32(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4170,7 +4171,7 @@
                                                (__v8si)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepu16_epi32(__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4178,7 +4179,7 @@
                                                (__v8si)_mm256_setzero_si256());
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_mask_cvtepu16_epi64(__m128i __W, __mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4186,7 +4187,7 @@
                                                (__v2di)__W);
   }
 
-  static __inline__ __m128i __DEFAULT_FN_ATTRS
+  static __inline__ __m128i __DEFAULT_FN_ATTRS128
   _mm_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4194,7 +4195,7 @@
                                                (__v2di)_mm_setzero_si128());
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_mask_cvtepu16_epi64(__m256i __W, __mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4202,7 +4203,7 @@
                                                (__v4di)__W);
   }
 
-  static __inline__ __m256i __DEFAULT_FN_ATTRS
+  static __inline__ __m256i __DEFAULT_FN_ATTRS256
   _mm256_maskz_cvtepu16_epi64(__mmask8 __U, __m128i __A)
   {
     return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4263,13 +4264,13 @@
                                       (__v4di)_mm256_rol_epi64((a), (b)), \
                                       (__v4di)_mm256_setzero_si256())
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rolv_epi32 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_prolvd128((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_rolv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4277,7 +4278,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_rolv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4285,13 +4286,13 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rolv_epi32 (__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_prolvd256((__v8si)__A, (__v8si)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_rolv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4299,7 +4300,7 @@
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_rolv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4307,13 +4308,13 @@
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rolv_epi64 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_prolvq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_rolv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4321,7 +4322,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_rolv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4329,13 +4330,13 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rolv_epi64 (__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_prolvq256((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_rolv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4343,7 +4344,7 @@
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_rolv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4403,7 +4404,7 @@
                                       (__v4di)_mm256_ror_epi64((a), (b)), \
                                       (__v4di)_mm256_setzero_si256())
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sll_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4411,7 +4412,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sll_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4419,7 +4420,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sll_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4427,7 +4428,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sll_epi32(__mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4435,7 +4436,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_slli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4443,7 +4444,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_slli_epi32(__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4451,7 +4452,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_slli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4459,7 +4460,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_slli_epi32(__mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4467,7 +4468,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sll_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4475,7 +4476,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sll_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4483,7 +4484,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sll_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4491,7 +4492,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sll_epi64(__mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4499,7 +4500,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_slli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4507,7 +4508,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_slli_epi64(__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4515,7 +4516,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_slli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4523,7 +4524,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_slli_epi64(__mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4531,13 +4532,13 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rorv_epi32 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_prorvd128((__v4si)__A, (__v4si)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_rorv_epi32 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4545,7 +4546,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_rorv_epi32 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -4553,13 +4554,13 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rorv_epi32 (__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_prorvd256((__v8si)__A, (__v8si)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_rorv_epi32 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4567,7 +4568,7 @@
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_rorv_epi32 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -4575,13 +4576,13 @@
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_rorv_epi64 (__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_prorvq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_rorv_epi64 (__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4589,7 +4590,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_rorv_epi64 (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128(__U,
@@ -4597,13 +4598,13 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_rorv_epi64 (__m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_prorvq256((__v4di)__A, (__v4di)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_rorv_epi64 (__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4611,7 +4612,7 @@
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_rorv_epi64 (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256(__U,
@@ -4619,7 +4620,7 @@
                                             (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sllv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4627,7 +4628,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sllv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4635,7 +4636,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sllv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4643,7 +4644,7 @@
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sllv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4651,7 +4652,7 @@
                                             (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sllv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4659,7 +4660,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sllv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4667,7 +4668,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sllv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4675,7 +4676,7 @@
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sllv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4683,7 +4684,7 @@
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srlv_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4691,7 +4692,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srlv_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4699,7 +4700,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srlv_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4707,7 +4708,7 @@
                                             (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srlv_epi64(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4715,7 +4716,7 @@
                                             (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srlv_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4723,7 +4724,7 @@
                                             (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srlv_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4731,7 +4732,7 @@
                                             (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srlv_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4739,7 +4740,7 @@
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srlv_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4747,7 +4748,7 @@
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srl_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4755,7 +4756,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srl_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4763,7 +4764,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srl_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4771,7 +4772,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srl_epi32(__mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4779,7 +4780,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srli_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4787,7 +4788,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srli_epi32(__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4795,7 +4796,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srli_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4803,7 +4804,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srli_epi32(__mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4811,7 +4812,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srl_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4819,7 +4820,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srl_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4827,7 +4828,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srl_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4835,7 +4836,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srl_epi64(__mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4843,7 +4844,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srli_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4851,7 +4852,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srli_epi64(__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4859,7 +4860,7 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srli_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4867,7 +4868,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srli_epi64(__mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4875,7 +4876,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srav_epi32(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4883,7 +4884,7 @@
                                             (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srav_epi32(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -4891,7 +4892,7 @@
                                             (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srav_epi32(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4899,7 +4900,7 @@
                                             (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srav_epi32(__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -4907,13 +4908,13 @@
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srav_epi64(__m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_psravq128((__v2di)__X, (__v2di)__Y);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srav_epi64(__m128i __W, __mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4921,7 +4922,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srav_epi64(__mmask8 __U, __m128i __X, __m128i __Y)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -4929,13 +4930,13 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srav_epi64(__m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_psravq256((__v4di)__X, (__v4di) __Y);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srav_epi64(__m256i __W, __mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4943,7 +4944,7 @@
                                              (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srav_epi64 (__mmask8 __U, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -4951,7 +4952,7 @@
                                              (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mov_epi32 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
@@ -4959,7 +4960,7 @@
                  (__v4si) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mov_epi32 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectd_128 ((__mmask8) __U,
@@ -4968,7 +4969,7 @@
 }
 
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mov_epi32 (__m256i __W, __mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
@@ -4976,7 +4977,7 @@
                  (__v8si) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mov_epi32 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectd_256 ((__mmask8) __U,
@@ -4984,7 +4985,7 @@
                  (__v8si) _mm256_setzero_si256 ());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_load_epi32 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
@@ -4993,7 +4994,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_load_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa32load128_mask ((__v4si *) __P,
@@ -5003,7 +5004,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_load_epi32 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
@@ -5012,7 +5013,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_load_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa32load256_mask ((__v8si *) __P,
@@ -5022,7 +5023,7 @@
               __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_store_epi32 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_movdqa32store128_mask ((__v4si *) __P,
@@ -5030,7 +5031,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_store_epi32 (void *__P, __mmask8 __U, __m256i __A)
 {
   __builtin_ia32_movdqa32store256_mask ((__v8si *) __P,
@@ -5038,7 +5039,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_mov_epi64 (__m128i __W, __mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
@@ -5046,7 +5047,7 @@
                  (__v2di) __W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_mov_epi64 (__mmask8 __U, __m128i __A)
 {
   return (__m128i) __builtin_ia32_selectq_128 ((__mmask8) __U,
@@ -5054,7 +5055,7 @@
                  (__v2di) _mm_setzero_si128 ());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_mov_epi64 (__m256i __W, __mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
@@ -5062,7 +5063,7 @@
                  (__v4di) __W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_mov_epi64 (__mmask8 __U, __m256i __A)
 {
   return (__m256i) __builtin_ia32_selectq_256 ((__mmask8) __U,
@@ -5070,7 +5071,7 @@
                  (__v4di) _mm256_setzero_si256 ());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_load_epi64 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
@@ -5079,7 +5080,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_load_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_movdqa64load128_mask ((__v2di *) __P,
@@ -5089,7 +5090,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_load_epi64 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
@@ -5098,7 +5099,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_load_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_movdqa64load256_mask ((__v4di *) __P,
@@ -5108,7 +5109,7 @@
               __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_store_epi64 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_movdqa64store128_mask ((__v2di *) __P,
@@ -5116,7 +5117,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_store_epi64 (void *__P, __mmask8 __U, __m256i __A)
 {
   __builtin_ia32_movdqa64store256_mask ((__v4di *) __P,
@@ -5124,7 +5125,7 @@
           (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_movedup_pd (__m128d __W, __mmask8 __U, __m128d __A)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5132,7 +5133,7 @@
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_movedup_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5140,7 +5141,7 @@
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_movedup_pd (__m256d __W, __mmask8 __U, __m256d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5148,7 +5149,7 @@
                                               (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_movedup_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5156,7 +5157,7 @@
                                               (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_set1_epi32(__m128i __O, __mmask8 __M, int __A)
 {
    return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -5164,7 +5165,7 @@
                                               (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_set1_epi32( __mmask8 __M, int __A)
 {
    return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -5172,7 +5173,7 @@
                                               (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_set1_epi32(__m256i __O, __mmask8 __M, int __A)
 {
    return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -5180,7 +5181,7 @@
                                               (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_set1_epi32( __mmask8 __M, int __A)
 {
    return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -5189,7 +5190,7 @@
 }
 
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_set1_epi64 (__m128i __O, __mmask8 __M, long long __A)
 {
   return (__m128i) __builtin_ia32_selectq_128(__M,
@@ -5197,7 +5198,7 @@
                                               (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
   return (__m128i) __builtin_ia32_selectq_128(__M,
@@ -5205,7 +5206,7 @@
                                               (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_set1_epi64 (__m256i __O, __mmask8 __M, long long __A)
 {
   return (__m256i) __builtin_ia32_selectq_256(__M,
@@ -5213,7 +5214,7 @@
                                               (__v4di) __O) ;
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_set1_epi64 (__mmask8 __M, long long __A)
 {
    return (__m256i) __builtin_ia32_selectq_256(__M,
@@ -5293,7 +5294,7 @@
                                              (__v8si)(__m256i)(C), (int)(imm), \
                                              (__mmask8)(U))
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_load_pd (__m128d __W, __mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
@@ -5301,7 +5302,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_load_pd (__mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_loadapd128_mask ((__v2df *) __P,
@@ -5310,7 +5311,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_load_pd (__m256d __W, __mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
@@ -5318,7 +5319,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_load_pd (__mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_loadapd256_mask ((__v4df *) __P,
@@ -5327,7 +5328,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_load_ps (__m128 __W, __mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
@@ -5335,7 +5336,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_load_ps (__mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_loadaps128_mask ((__v4sf *) __P,
@@ -5344,7 +5345,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_load_ps (__m256 __W, __mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
@@ -5352,7 +5353,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_load_ps (__mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_loadaps256_mask ((__v8sf *) __P,
@@ -5361,7 +5362,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_loadu_epi64 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
@@ -5369,7 +5370,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqudi128_mask ((__v2di *) __P,
@@ -5378,7 +5379,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_loadu_epi64 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
@@ -5386,7 +5387,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_loadu_epi64 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqudi256_mask ((__v4di *) __P,
@@ -5395,7 +5396,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_loadu_epi32 (__m128i __W, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
@@ -5403,7 +5404,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_loaddqusi128_mask ((__v4si *) __P,
@@ -5412,7 +5413,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_loadu_epi32 (__m256i __W, __mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
@@ -5420,7 +5421,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_loadu_epi32 (__mmask8 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_loaddqusi256_mask ((__v8si *) __P,
@@ -5429,7 +5430,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_loadu_pd (__m128d __W, __mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
@@ -5437,7 +5438,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
   return (__m128d) __builtin_ia32_loadupd128_mask ((__v2df *) __P,
@@ -5446,7 +5447,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_loadu_pd (__m256d __W, __mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
@@ -5454,7 +5455,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_loadu_pd (__mmask8 __U, void const *__P)
 {
   return (__m256d) __builtin_ia32_loadupd256_mask ((__v4df *) __P,
@@ -5463,7 +5464,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_loadu_ps (__m128 __W, __mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
@@ -5471,7 +5472,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_loadu_ps (__mmask8 __U, void const *__P)
 {
   return (__m128) __builtin_ia32_loadups128_mask ((__v4sf *) __P,
@@ -5480,7 +5481,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_loadu_ps (__m256 __W, __mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
@@ -5488,7 +5489,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_loadu_ps (__mmask8 __U, void const *__P)
 {
   return (__m256) __builtin_ia32_loadups256_mask ((__v8sf *) __P,
@@ -5497,7 +5498,7 @@
               (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_store_pd (void *__P, __mmask8 __U, __m128d __A)
 {
   __builtin_ia32_storeapd128_mask ((__v2df *) __P,
@@ -5505,7 +5506,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_store_pd (void *__P, __mmask8 __U, __m256d __A)
 {
   __builtin_ia32_storeapd256_mask ((__v4df *) __P,
@@ -5513,7 +5514,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_store_ps (void *__P, __mmask8 __U, __m128 __A)
 {
   __builtin_ia32_storeaps128_mask ((__v4sf *) __P,
@@ -5521,7 +5522,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_store_ps (void *__P, __mmask8 __U, __m256 __A)
 {
   __builtin_ia32_storeaps256_mask ((__v8sf *) __P,
@@ -5529,7 +5530,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_storeu_epi64 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_storedqudi128_mask ((__v2di *) __P,
@@ -5537,7 +5538,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_storeu_epi64 (void *__P, __mmask8 __U, __m256i __A)
 {
   __builtin_ia32_storedqudi256_mask ((__v4di *) __P,
@@ -5545,7 +5546,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_storeu_epi32 (void *__P, __mmask8 __U, __m128i __A)
 {
   __builtin_ia32_storedqusi128_mask ((__v4si *) __P,
@@ -5553,7 +5554,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_storeu_epi32 (void *__P, __mmask8 __U, __m256i __A)
 {
   __builtin_ia32_storedqusi256_mask ((__v8si *) __P,
@@ -5561,7 +5562,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_storeu_pd (void *__P, __mmask8 __U, __m128d __A)
 {
   __builtin_ia32_storeupd128_mask ((__v2df *) __P,
@@ -5569,7 +5570,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_storeu_pd (void *__P, __mmask8 __U, __m256d __A)
 {
   __builtin_ia32_storeupd256_mask ((__v4df *) __P,
@@ -5577,7 +5578,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_storeu_ps (void *__P, __mmask8 __U, __m128 __A)
 {
   __builtin_ia32_storeups128_mask ((__v4sf *) __P,
@@ -5585,7 +5586,7 @@
            (__mmask8) __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_storeu_ps (void *__P, __mmask8 __U, __m256 __A)
 {
   __builtin_ia32_storeups256_mask ((__v8sf *) __P,
@@ -5594,7 +5595,7 @@
 }
 
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_unpackhi_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5602,7 +5603,7 @@
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_unpackhi_pd(__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5610,7 +5611,7 @@
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_unpackhi_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5618,7 +5619,7 @@
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpackhi_pd(__mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5626,7 +5627,7 @@
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_unpackhi_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5634,7 +5635,7 @@
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_unpackhi_ps(__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5642,7 +5643,7 @@
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_unpackhi_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5650,7 +5651,7 @@
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpackhi_ps(__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5658,7 +5659,7 @@
                                            (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_unpacklo_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5666,7 +5667,7 @@
                                               (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_unpacklo_pd(__mmask8 __U, __m128d __A, __m128d __B)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5674,7 +5675,7 @@
                                               (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_unpacklo_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5682,7 +5683,7 @@
                                            (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpacklo_pd(__mmask8 __U, __m256d __A, __m256d __B)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5690,7 +5691,7 @@
                                            (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_unpacklo_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5698,7 +5699,7 @@
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_unpacklo_ps(__mmask8 __U, __m128 __A, __m128 __B)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5706,7 +5707,7 @@
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_unpacklo_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5714,7 +5715,7 @@
                                            (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpacklo_ps(__mmask8 __U, __m256 __A, __m256 __B)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5722,7 +5723,7 @@
                                            (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_rcp14_pd (__m128d __A)
 {
   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
@@ -5731,7 +5732,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_rcp14_pd (__m128d __W, __mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
@@ -5739,7 +5740,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_rcp14_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_rcp14pd128_mask ((__v2df) __A,
@@ -5748,7 +5749,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_rcp14_pd (__m256d __A)
 {
   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
@@ -5757,7 +5758,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_rcp14_pd (__m256d __W, __mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
@@ -5765,7 +5766,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_rcp14_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_rcp14pd256_mask ((__v4df) __A,
@@ -5774,7 +5775,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_rcp14_ps (__m128 __A)
 {
   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
@@ -5783,7 +5784,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_rcp14_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
@@ -5791,7 +5792,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_rcp14_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_rcp14ps128_mask ((__v4sf) __A,
@@ -5800,7 +5801,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_rcp14_ps (__m256 __A)
 {
   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
@@ -5809,7 +5810,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_rcp14_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
@@ -5817,7 +5818,7 @@
                (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_rcp14_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_rcp14ps256_mask ((__v8sf) __A,
@@ -5866,7 +5867,7 @@
                                       (__v8sf)_mm256_permute_ps((X), (C)), \
                                       (__v8sf)_mm256_setzero_ps())
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_permutevar_pd(__m128d __W, __mmask8 __U, __m128d __A, __m128i __C)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5874,7 +5875,7 @@
                                             (__v2df)__W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_permutevar_pd(__mmask8 __U, __m128d __A, __m128i __C)
 {
   return (__m128d)__builtin_ia32_selectpd_128((__mmask8)__U,
@@ -5882,7 +5883,7 @@
                                             (__v2df)_mm_setzero_pd());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_permutevar_pd(__m256d __W, __mmask8 __U, __m256d __A, __m256i __C)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5890,7 +5891,7 @@
                                          (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutevar_pd(__mmask8 __U, __m256d __A, __m256i __C)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -5898,7 +5899,7 @@
                                          (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_permutevar_ps(__m128 __W, __mmask8 __U, __m128 __A, __m128i __C)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5906,7 +5907,7 @@
                                             (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_permutevar_ps(__mmask8 __U, __m128 __A, __m128i __C)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -5914,7 +5915,7 @@
                                             (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_permutevar_ps(__m256 __W, __mmask8 __U, __m256 __A, __m256i __C)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5922,7 +5923,7 @@
                                           (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutevar_ps(__mmask8 __U, __m256 __A, __m256i __C)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -5930,115 +5931,115 @@
                                           (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_test_epi32_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpneq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_mask_test_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpneq_epi32_mask (__U, _mm_and_si128 (__A, __B),
                                      _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_test_epi32_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpneq_epi32_mask (_mm256_and_si256 (__A, __B),
                                    _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_mask_test_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpneq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
                                         _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_test_epi64_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpneq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_mask_test_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpneq_epi64_mask (__U, _mm_and_si128 (__A, __B),
                                      _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_test_epi64_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpneq_epi64_mask (_mm256_and_si256 (__A, __B),
                                    _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_mask_test_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpneq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
                                         _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_testn_epi32_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpeq_epi32_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_mask_testn_epi32_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpeq_epi32_mask (__U, _mm_and_si128 (__A, __B),
                                     _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_testn_epi32_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpeq_epi32_mask (_mm256_and_si256 (__A, __B),
                                   _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_mask_testn_epi32_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpeq_epi32_mask (__U, _mm256_and_si256 (__A, __B),
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_testn_epi64_mask (__m128i __A, __m128i __B)
 {
   return _mm_cmpeq_epi64_mask (_mm_and_si128 (__A, __B), _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS128
 _mm_mask_testn_epi64_mask (__mmask8 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_cmpeq_epi64_mask (__U, _mm_and_si128 (__A, __B),
                                     _mm_setzero_si128());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_testn_epi64_mask (__m256i __A, __m256i __B)
 {
   return _mm256_cmpeq_epi64_mask (_mm256_and_si256 (__A, __B),
                                   _mm256_setzero_si256());
 }
 
-static __inline__ __mmask8 __DEFAULT_FN_ATTRS
+static __inline__ __mmask8 __DEFAULT_FN_ATTRS256
 _mm256_mask_testn_epi64_mask (__mmask8 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_cmpeq_epi64_mask (__U, _mm256_and_si256 (__A, __B),
                                        _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpackhi_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6046,7 +6047,7 @@
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpackhi_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6054,7 +6055,7 @@
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpackhi_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6062,7 +6063,7 @@
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpackhi_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6070,7 +6071,7 @@
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpackhi_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6078,7 +6079,7 @@
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpackhi_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6086,7 +6087,7 @@
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpackhi_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6094,7 +6095,7 @@
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpackhi_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6102,7 +6103,7 @@
                                         (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpacklo_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6110,7 +6111,7 @@
                                            (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpacklo_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6118,7 +6119,7 @@
                                            (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpacklo_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6126,7 +6127,7 @@
                                         (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpacklo_epi32(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6134,7 +6135,7 @@
                                         (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_unpacklo_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6142,7 +6143,7 @@
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_unpacklo_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U,
@@ -6150,7 +6151,7 @@
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_unpacklo_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6158,7 +6159,7 @@
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_unpacklo_epi64(__mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U,
@@ -6166,7 +6167,7 @@
                                         (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sra_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6174,7 +6175,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sra_epi32(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6182,7 +6183,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sra_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6190,7 +6191,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sra_epi32(__mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6198,7 +6199,7 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srai_epi32(__m128i __W, __mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6206,7 +6207,7 @@
                                              (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srai_epi32(__mmask8 __U, __m128i __A, int __B)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__U,
@@ -6214,7 +6215,7 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srai_epi32(__m256i __W, __mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6222,7 +6223,7 @@
                                              (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srai_epi32(__mmask8 __U, __m256i __A, int __B)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__U,
@@ -6230,13 +6231,13 @@
                                              (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_sra_epi64(__m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_psraq128((__v2di)__A, (__v2di)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_sra_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
@@ -6244,7 +6245,7 @@
                                              (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_sra_epi64(__mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
@@ -6252,13 +6253,13 @@
                                              (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_sra_epi64(__m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_psraq256((__v4di) __A, (__v2di) __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_sra_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
@@ -6266,7 +6267,7 @@
                                            (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_sra_epi64(__mmask8 __U, __m256i __A, __m128i __B)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
@@ -6274,13 +6275,13 @@
                                            (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_srai_epi64(__m128i __A, int __imm)
 {
   return (__m128i)__builtin_ia32_psraqi128((__v2di)__A, __imm);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_srai_epi64(__m128i __W, __mmask8 __U, __m128i __A, int __imm)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
@@ -6288,7 +6289,7 @@
                                            (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_srai_epi64(__mmask8 __U, __m128i __A, int __imm)
 {
   return (__m128i)__builtin_ia32_selectq_128((__mmask8)__U, \
@@ -6296,13 +6297,13 @@
                                            (__v2di)_mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_srai_epi64(__m256i __A, int __imm)
 {
   return (__m256i)__builtin_ia32_psraqi256((__v4di)__A, __imm);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_srai_epi64(__m256i __W, __mmask8 __U, __m256i __A, int __imm)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
@@ -6310,7 +6311,7 @@
                                         (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_srai_epi64(__mmask8 __U, __m256i __A, int __imm)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__U, \
@@ -6489,7 +6490,7 @@
                                       (__v8sf)_mm256_shuffle_ps((A), (B), (M)), \
                                       (__v8sf)_mm256_setzero_ps())
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_rsqrt14_pd (__m128d __A)
 {
   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
@@ -6498,7 +6499,7 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_rsqrt14_pd (__m128d __W, __mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
@@ -6506,7 +6507,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_rsqrt14_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_rsqrt14pd128_mask ((__v2df) __A,
@@ -6515,7 +6516,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_rsqrt14_pd (__m256d __A)
 {
   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
@@ -6524,7 +6525,7 @@
                  (__mmask8) -1);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_rsqrt14_pd (__m256d __W, __mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
@@ -6532,7 +6533,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_rsqrt14_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_rsqrt14pd256_mask ((__v4df) __A,
@@ -6541,7 +6542,7 @@
                  (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_rsqrt14_ps (__m128 __A)
 {
   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
@@ -6550,7 +6551,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_rsqrt14_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
@@ -6558,7 +6559,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_rsqrt14_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_rsqrt14ps128_mask ((__v4sf) __A,
@@ -6567,7 +6568,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_rsqrt14_ps (__m256 __A)
 {
   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
@@ -6576,7 +6577,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_rsqrt14_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
@@ -6584,7 +6585,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_rsqrt14_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_rsqrt14ps256_mask ((__v8sf) __A,
@@ -6593,14 +6594,14 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_broadcast_f32x4(__m128 __A)
 {
   return (__m256)__builtin_shufflevector((__v4sf)__A, (__v4sf)__A,
                                          0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcast_f32x4(__m256 __O, __mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -6608,7 +6609,7 @@
                                             (__v8sf)__O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcast_f32x4 (__mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__M,
@@ -6616,14 +6617,14 @@
                                             (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_broadcast_i32x4(__m128i __A)
 {
   return (__m256i)__builtin_shufflevector((__v4si)__A, (__v4si)__A,
                                           0, 1, 2, 3, 0, 1, 2, 3);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcast_i32x4(__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -6631,7 +6632,7 @@
                                             (__v8si)__O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcast_i32x4(__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -6639,7 +6640,7 @@
                                             (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcastsd_pd (__m256d __O, __mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
@@ -6647,7 +6648,7 @@
                                               (__v4df) __O);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcastsd_pd (__mmask8 __M, __m128d __A)
 {
   return (__m256d)__builtin_ia32_selectpd_256(__M,
@@ -6655,7 +6656,7 @@
                                               (__v4df) _mm256_setzero_pd());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_broadcastss_ps (__m128 __O, __mmask8 __M, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128(__M,
@@ -6663,7 +6664,7 @@
                                              (__v4sf) __O);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128(__M,
@@ -6671,7 +6672,7 @@
                                              (__v4sf) _mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcastss_ps (__m256 __O, __mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256(__M,
@@ -6679,7 +6680,7 @@
                                              (__v8sf) __O);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcastss_ps (__mmask8 __M, __m128 __A)
 {
   return (__m256)__builtin_ia32_selectps_256(__M,
@@ -6687,7 +6688,7 @@
                                              (__v8sf) _mm256_setzero_ps());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_broadcastd_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -6695,7 +6696,7 @@
                                              (__v4si) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128(__M,
@@ -6703,7 +6704,7 @@
                                              (__v4si) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcastd_epi32 (__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -6711,7 +6712,7 @@
                                              (__v8si) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcastd_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectd_256(__M,
@@ -6719,7 +6720,7 @@
                                              (__v8si) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_broadcastq_epi64 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -6727,7 +6728,7 @@
                                              (__v2di) __O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m128i)__builtin_ia32_selectq_128(__M,
@@ -6735,7 +6736,7 @@
                                              (__v2di) _mm_setzero_si128());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_broadcastq_epi64 (__m256i __O, __mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -6743,7 +6744,7 @@
                                              (__v4di) __O);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_broadcastq_epi64 (__mmask8 __M, __m128i __A)
 {
   return (__m256i)__builtin_ia32_selectq_256(__M,
@@ -6751,7 +6752,7 @@
                                              (__v4di) _mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtsepi32_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
@@ -6759,14 +6760,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
                (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsepi32_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb128_mask ((__v4si) __A,
@@ -6774,13 +6775,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovsdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm256_cvtsepi32_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
@@ -6788,14 +6789,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
                (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtsepi32_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdb256_mask ((__v8si) __A,
@@ -6803,13 +6804,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm256_mask_cvtsepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovsdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtsepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
@@ -6817,7 +6818,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
@@ -6825,7 +6826,7 @@
                __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsepi32_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw128_mask ((__v4si) __A,
@@ -6833,13 +6834,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovsdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtsepi32_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
@@ -6847,14 +6848,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
                (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtsepi32_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsdw256_mask ((__v8si) __A,
@@ -6862,13 +6863,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovsdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtsepi64_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
@@ -6876,14 +6877,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
                (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsepi64_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb128_mask ((__v2di) __A,
@@ -6891,13 +6892,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovsqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtsepi64_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
@@ -6905,14 +6906,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
                (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtsepi64_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqb256_mask ((__v4di) __A,
@@ -6920,13 +6921,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovsqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtsepi64_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
@@ -6934,14 +6935,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
                (__v4si) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsepi64_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd128_mask ((__v2di) __A,
@@ -6949,13 +6950,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovsqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtsepi64_epi32 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
@@ -6963,7 +6964,7 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
@@ -6971,7 +6972,7 @@
                __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtsepi64_epi32 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqd256_mask ((__v4di) __A,
@@ -6979,13 +6980,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovsqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtsepi64_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
@@ -6993,14 +6994,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
                (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtsepi64_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw128_mask ((__v2di) __A,
@@ -7008,13 +7009,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovsqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtsepi64_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
@@ -7022,14 +7023,14 @@
                (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
                (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtsepi64_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovsqw256_mask ((__v4di) __A,
@@ -7037,13 +7038,13 @@
                __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtsepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovsqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtusepi32_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
@@ -7051,7 +7052,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
@@ -7059,7 +7060,7 @@
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtusepi32_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb128_mask ((__v4si) __A,
@@ -7067,13 +7068,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovusdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtusepi32_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
@@ -7081,7 +7082,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
@@ -7089,7 +7090,7 @@
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtusepi32_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdb256_mask ((__v8si) __A,
@@ -7097,13 +7098,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovusdb256mem_mask ((__v16qi*) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtusepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
@@ -7111,14 +7112,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
                 (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtusepi32_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw128_mask ((__v4si) __A,
@@ -7126,13 +7127,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovusdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtusepi32_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
@@ -7140,14 +7141,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
                 (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtusepi32_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusdw256_mask ((__v8si) __A,
@@ -7155,13 +7156,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi32_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovusdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtusepi64_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
@@ -7169,7 +7170,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
@@ -7177,7 +7178,7 @@
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtusepi64_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb128_mask ((__v2di) __A,
@@ -7185,13 +7186,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovusqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtusepi64_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
@@ -7199,7 +7200,7 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
@@ -7207,7 +7208,7 @@
                 __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtusepi64_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqb256_mask ((__v4di) __A,
@@ -7215,13 +7216,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovusqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtusepi64_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
@@ -7229,14 +7230,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
                 (__v4si) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtusepi64_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd128_mask ((__v2di) __A,
@@ -7244,13 +7245,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovusqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtusepi64_epi32 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
@@ -7258,14 +7259,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
                 (__v4si) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtusepi64_epi32 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqd256_mask ((__v4di) __A,
@@ -7273,13 +7274,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovusqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtusepi64_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
@@ -7287,14 +7288,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
                 (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtusepi64_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw128_mask ((__v2di) __A,
@@ -7302,13 +7303,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovusqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtusepi64_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
@@ -7316,14 +7317,14 @@
                 (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
                 (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtusepi64_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovusqw256_mask ((__v4di) __A,
@@ -7331,13 +7332,13 @@
                 __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtusepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovusqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi32_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
@@ -7345,14 +7346,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
               (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi32_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb128_mask ((__v4si) __A,
@@ -7361,13 +7362,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovdb128mem_mask ((__v16qi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi32_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
@@ -7375,14 +7376,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi32_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
               (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi32_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdb256_mask ((__v8si) __A,
@@ -7390,13 +7391,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi32_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovdb256mem_mask ((__v16qi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi32_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
@@ -7404,14 +7405,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
               (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi32_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw128_mask ((__v4si) __A,
@@ -7419,26 +7420,26 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi32_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovdw128mem_mask ((__v8hi *) __P, (__v4si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi32_epi16 (__m256i __A)
 {
   return (__m128i)__builtin_convertvector((__v8si)__A, __v8hi);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi32_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
               (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi32_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovdw256_mask ((__v8si) __A,
@@ -7446,13 +7447,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi32_storeu_epi16 (void *  __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovdw256mem_mask ((__v8hi *) __P, (__v8si) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_epi8 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
@@ -7460,14 +7461,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
               (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi64_epi8 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb128_mask ((__v2di) __A,
@@ -7475,13 +7476,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovqb128mem_mask ((__v16qi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_epi8 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
@@ -7489,14 +7490,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_epi8 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
               (__v16qi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi64_epi8 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqb256_mask ((__v4di) __A,
@@ -7504,13 +7505,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_storeu_epi8 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovqb256mem_mask ((__v16qi *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_epi32 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
@@ -7518,14 +7519,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
               (__v4si) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi64_epi32 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqd128_mask ((__v2di) __A,
@@ -7533,19 +7534,19 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovqd128mem_mask ((__v4si *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_epi32 (__m256i __A)
 {
   return (__m128i)__builtin_convertvector((__v4di)__A, __v4si);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_epi32 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -7553,7 +7554,7 @@
                                              (__v4si)__O);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi64_epi32 (__mmask8 __M, __m256i __A)
 {
   return (__m128i)__builtin_ia32_selectd_128((__mmask8)__M,
@@ -7561,13 +7562,13 @@
                                              (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_storeu_epi32 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovqd256mem_mask ((__v4si *) __P, (__v4di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_cvtepi64_epi16 (__m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
@@ -7575,7 +7576,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
@@ -7583,7 +7584,7 @@
               __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtepi64_epi16 (__mmask8 __M, __m128i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw128_mask ((__v2di) __A,
@@ -7591,13 +7592,13 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m128i __A)
 {
   __builtin_ia32_pmovqw128mem_mask ((__v8hi *) __P, (__v2di) __A, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_cvtepi64_epi16 (__m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
@@ -7605,14 +7606,14 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_epi16 (__m128i __O, __mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
               (__v8hi) __O, __M);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtepi64_epi16 (__mmask8 __M, __m256i __A)
 {
   return (__m128i) __builtin_ia32_pmovqw256_mask ((__v4di) __A,
@@ -7620,7 +7621,7 @@
               __M);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtepi64_storeu_epi16 (void * __P, __mmask8 __M, __m256i __A)
 {
   __builtin_ia32_pmovqw256mem_mask ((__v8hi *) __P, (__v4di) __A, __M);
@@ -7884,13 +7885,13 @@
                                       (__v4di)_mm256_permutex_epi64((X), (C)), \
                                       (__v4di)_mm256_setzero_si256())
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_permutexvar_pd (__m256i __X, __m256d __Y)
 {
   return (__m256d)__builtin_ia32_permvardf256((__v4df)__Y, (__v4di)__X);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_permutexvar_pd (__m256d __W, __mmask8 __U, __m256i __X,
           __m256d __Y)
 {
@@ -7899,7 +7900,7 @@
                                         (__v4df)__W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_pd (__mmask8 __U, __m256i __X, __m256d __Y)
 {
   return (__m256d)__builtin_ia32_selectpd_256((__mmask8)__U,
@@ -7907,13 +7908,13 @@
                                         (__v4df)_mm256_setzero_pd());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_permutexvar_epi64 ( __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_permvardi256((__v4di) __Y, (__v4di) __X);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_epi64 (__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectq_256((__mmask8)__M,
@@ -7921,7 +7922,7 @@
                                      (__v4di)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_permutexvar_epi64 (__m256i __W, __mmask8 __M, __m256i __X,
              __m256i __Y)
 {
@@ -7932,7 +7933,7 @@
 
 #define _mm256_permutexvar_ps(A, B) _mm256_permutevar8x32_ps((B), (A))
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_permutexvar_ps(__m256 __W, __mmask8 __U, __m256i __X, __m256 __Y)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -7940,7 +7941,7 @@
                                         (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_ps(__mmask8 __U, __m256i __X, __m256 __Y)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -7950,7 +7951,7 @@
 
 #define _mm256_permutexvar_epi32(A, B) _mm256_permutevar8x32_epi32((B), (A))
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_permutexvar_epi32(__m256i __W, __mmask8 __M, __m256i __X,
                               __m256i __Y)
 {
@@ -7959,7 +7960,7 @@
                                      (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_permutexvar_epi32(__mmask8 __M, __m256i __X, __m256i __Y)
 {
   return (__m256i)__builtin_ia32_selectd_256((__mmask8)__M,
@@ -8023,7 +8024,7 @@
                                  (__v4di)_mm256_alignr_epi64((A), (B), (imm)), \
                                  (__v4di)_mm256_setzero_si256())
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_movehdup_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8031,7 +8032,7 @@
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_movehdup_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8039,7 +8040,7 @@
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_movehdup_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8047,7 +8048,7 @@
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_movehdup_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8055,7 +8056,7 @@
                                              (__v8sf)_mm256_setzero_ps());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_moveldup_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8063,7 +8064,7 @@
                                              (__v4sf)__W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_moveldup_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128)__builtin_ia32_selectps_128((__mmask8)__U,
@@ -8071,7 +8072,7 @@
                                              (__v4sf)_mm_setzero_ps());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_moveldup_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8079,7 +8080,7 @@
                                              (__v8sf)__W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_moveldup_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256)__builtin_ia32_selectps_256((__mmask8)__U,
@@ -8107,7 +8108,7 @@
                                       (__v4si)_mm_shuffle_epi32((A), (I)), \
                                       (__v4si)_mm_setzero_si128())
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_mask_mov_pd (__m128d __W, __mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
@@ -8115,7 +8116,7 @@
               (__v2df) __W);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maskz_mov_pd (__mmask8 __U, __m128d __A)
 {
   return (__m128d) __builtin_ia32_selectpd_128 ((__mmask8) __U,
@@ -8123,7 +8124,7 @@
               (__v2df) _mm_setzero_pd ());
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_mask_mov_pd (__m256d __W, __mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
@@ -8131,7 +8132,7 @@
               (__v4df) __W);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maskz_mov_pd (__mmask8 __U, __m256d __A)
 {
   return (__m256d) __builtin_ia32_selectpd_256 ((__mmask8) __U,
@@ -8139,7 +8140,7 @@
               (__v4df) _mm256_setzero_pd ());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_mov_ps (__m128 __W, __mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
@@ -8147,7 +8148,7 @@
              (__v4sf) __W);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_mov_ps (__mmask8 __U, __m128 __A)
 {
   return (__m128) __builtin_ia32_selectps_128 ((__mmask8) __U,
@@ -8155,7 +8156,7 @@
              (__v4sf) _mm_setzero_ps ());
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_mov_ps (__m256 __W, __mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
@@ -8163,7 +8164,7 @@
              (__v8sf) __W);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_mov_ps (__mmask8 __U, __m256 __A)
 {
   return (__m256) __builtin_ia32_selectps_256 ((__mmask8) __U,
@@ -8171,7 +8172,7 @@
              (__v8sf) _mm256_setzero_ps ());
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_mask_cvtph_ps (__m128 __W, __mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
@@ -8179,7 +8180,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
 {
   return (__m128) __builtin_ia32_vcvtph2ps_mask ((__v8hi) __A,
@@ -8188,7 +8189,7 @@
              (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtph_ps (__m256 __W, __mmask8 __U, __m128i __A)
 {
   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
@@ -8196,7 +8197,7 @@
                 (__mmask8) __U);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtph_ps (__mmask8 __U, __m128i __A)
 {
   return (__m256) __builtin_ia32_vcvtph2ps256_mask ((__v8hi) __A,
@@ -8205,7 +8206,7 @@
                 (__mmask8) __U);
 }
 
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
@@ -8213,7 +8214,7 @@
                                                   (__mmask8) __U);
 }
 
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_cvtps_ph (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph_mask ((__v4sf) __A, _MM_FROUND_CUR_DIRECTION,
@@ -8231,7 +8232,7 @@
                                          (__v8hi)_mm_setzero_si128(), \
                                          (__mmask8)(U))
 
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS256
 _mm256_mask_cvtps_ph (__m128i __W, __mmask8 __U, __m256 __A)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
@@ -8239,7 +8240,7 @@
                                                       (__mmask8) __U);
 }
 
-static __inline __m128i __DEFAULT_FN_ATTRS
+static __inline __m128i __DEFAULT_FN_ATTRS256
 _mm256_maskz_cvtps_ph ( __mmask8 __U, __m256 __A)
 {
   return (__m128i) __builtin_ia32_vcvtps2ph256_mask ((__v8sf) __A, _MM_FROUND_CUR_DIRECTION,
@@ -8257,6 +8258,7 @@
                                             (__mmask8)(U))
 
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __AVX512VLINTRIN_H */
Index: lib/Headers/avx512vlvbmi2intrin.h
===================================================================
--- lib/Headers/avx512vlvbmi2intrin.h
+++ lib/Headers/avx512vlvbmi2intrin.h
@@ -29,9 +29,10 @@
 #define __AVX512VLVBMI2INTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vbmi2"), __min_vector_width__(256)))
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_compress_epi16(__m128i __S, __mmask8 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
@@ -39,7 +40,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_compress_epi16(__mmask8 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_compresshi128_mask ((__v8hi) __D,
@@ -47,7 +48,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_compress_epi8(__m128i __S, __mmask16 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
@@ -55,7 +56,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_compress_epi8(__mmask16 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_compressqi128_mask ((__v16qi) __D,
@@ -63,21 +64,21 @@
               __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_compressstoreu_epi16(void *__P, __mmask8 __U, __m128i __D)
 {
   __builtin_ia32_compressstorehi128_mask ((__v8hi *) __P, (__v8hi) __D,
               __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS128
 _mm_mask_compressstoreu_epi8(void *__P, __mmask16 __U, __m128i __D)
 {
   __builtin_ia32_compressstoreqi128_mask ((__v16qi *) __P, (__v16qi) __D,
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expand_epi16(__m128i __S, __mmask8 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
@@ -85,7 +86,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expand_epi16(__mmask8 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_expandhi128_mask ((__v8hi) __D,
@@ -93,7 +94,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expand_epi8(__m128i __S, __mmask16 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
@@ -101,7 +102,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expand_epi8(__mmask16 __U, __m128i __D)
 {
   return (__m128i) __builtin_ia32_expandqi128_mask ((__v16qi) __D,
@@ -109,7 +110,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expandloadu_epi16(__m128i __S, __mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
@@ -117,7 +118,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expandloadu_epi16(__mmask8 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_expandloadhi128_mask ((const __v8hi *)__P,
@@ -125,7 +126,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_expandloadu_epi8(__m128i __S, __mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
@@ -133,7 +134,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_expandloadu_epi8(__mmask16 __U, void const *__P)
 {
   return (__m128i) __builtin_ia32_expandloadqi128_mask ((const __v16qi *)__P,
@@ -141,7 +142,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_compress_epi16(__m256i __S, __mmask16 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
@@ -149,7 +150,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_compress_epi16(__mmask16 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_compresshi256_mask ((__v16hi) __D,
@@ -157,7 +158,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_compress_epi8(__m256i __S, __mmask32 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
@@ -165,7 +166,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_compress_epi8(__mmask32 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_compressqi256_mask ((__v32qi) __D,
@@ -173,21 +174,21 @@
               __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_compressstoreu_epi16(void *__P, __mmask16 __U, __m256i __D)
 {
   __builtin_ia32_compressstorehi256_mask ((__v16hi *) __P, (__v16hi) __D,
               __U);
 }
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __DEFAULT_FN_ATTRS256
 _mm256_mask_compressstoreu_epi8(void *__P, __mmask32 __U, __m256i __D)
 {
   __builtin_ia32_compressstoreqi256_mask ((__v32qi *) __P, (__v32qi) __D,
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expand_epi16(__m256i __S, __mmask16 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
@@ -195,7 +196,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expand_epi16(__mmask16 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_expandhi256_mask ((__v16hi) __D,
@@ -203,7 +204,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expand_epi8(__m256i __S, __mmask32 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
@@ -211,7 +212,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expand_epi8(__mmask32 __U, __m256i __D)
 {
   return (__m256i) __builtin_ia32_expandqi256_mask ((__v32qi) __D,
@@ -219,7 +220,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expandloadu_epi16(__m256i __S, __mmask16 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
@@ -227,7 +228,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expandloadu_epi16(__mmask16 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_expandloadhi256_mask ((const __v16hi *)__P,
@@ -235,7 +236,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_expandloadu_epi8(__m256i __S, __mmask32 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
@@ -243,7 +244,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_expandloadu_epi8(__mmask32 __U, void const *__P)
 {
   return (__m256i) __builtin_ia32_expandloadqi256_mask ((const __v32qi *)__P,
@@ -419,7 +420,7 @@
                                       (__v8hi)_mm_shrdi_epi16((A), (B), (I)), \
                                       (__v8hi)_mm_setzero_si128())
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shldv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
@@ -428,7 +429,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shldv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvq256_maskz ((__v4di) __S,
@@ -437,7 +438,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shldv_epi64(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvq256_mask ((__v4di) __S,
@@ -446,7 +447,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shldv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
@@ -455,7 +456,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shldv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvq128_maskz ((__v2di) __S,
@@ -464,7 +465,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shldv_epi64(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvq128_mask ((__v2di) __S,
@@ -473,7 +474,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shldv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
@@ -482,7 +483,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shldv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvd256_maskz ((__v8si) __S,
@@ -491,7 +492,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shldv_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvd256_mask ((__v8si) __S,
@@ -500,7 +501,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shldv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
@@ -509,7 +510,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shldv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvd128_maskz ((__v4si) __S,
@@ -518,7 +519,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shldv_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvd128_mask ((__v4si) __S,
@@ -527,7 +528,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shldv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
@@ -536,7 +537,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shldv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvw256_maskz ((__v16hi) __S,
@@ -545,7 +546,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shldv_epi16(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshldvw256_mask ((__v16hi) __S,
@@ -554,7 +555,7 @@
               (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shldv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
@@ -563,7 +564,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shldv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvw128_maskz ((__v8hi) __S,
@@ -572,7 +573,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shldv_epi16(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshldvw128_mask ((__v8hi) __S,
@@ -581,7 +582,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shrdv_epi64(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
@@ -590,7 +591,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shrdv_epi64(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvq256_maskz ((__v4di) __S,
@@ -599,7 +600,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shrdv_epi64(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvq256_mask ((__v4di) __S,
@@ -608,7 +609,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shrdv_epi64(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
@@ -617,7 +618,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shrdv_epi64(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvq128_maskz ((__v2di) __S,
@@ -626,7 +627,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shrdv_epi64(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvq128_mask ((__v2di) __S,
@@ -635,7 +636,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shrdv_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
@@ -644,7 +645,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shrdv_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvd256_maskz ((__v8si) __S,
@@ -653,7 +654,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shrdv_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvd256_mask ((__v8si) __S,
@@ -662,7 +663,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shrdv_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
@@ -671,7 +672,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shrdv_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvd128_maskz ((__v4si) __S,
@@ -680,7 +681,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shrdv_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvd128_mask ((__v4si) __S,
@@ -689,7 +690,7 @@
               (__mmask8) -1);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_shrdv_epi16(__m256i __S, __mmask16 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
@@ -698,7 +699,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_shrdv_epi16(__mmask16 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvw256_maskz ((__v16hi) __S,
@@ -707,7 +708,7 @@
               __U);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_shrdv_epi16(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_vpshrdvw256_mask ((__v16hi) __S,
@@ -716,7 +717,7 @@
               (__mmask16) -1);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_shrdv_epi16(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
@@ -725,7 +726,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_shrdv_epi16(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvw128_maskz ((__v8hi) __S,
@@ -734,7 +735,7 @@
               __U);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_shrdv_epi16(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_vpshrdvw128_mask ((__v8hi) __S,
@@ -744,6 +745,7 @@
 }
 
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avx512vlvnniintrin.h
===================================================================
--- lib/Headers/avx512vlvnniintrin.h
+++ lib/Headers/avx512vlvnniintrin.h
@@ -29,17 +29,18 @@
 #define __AVX512VLVNNIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512vnni"), __min_vector_width__(256)))
 
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpbusd_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_vpdpbusd256((__v8si)__S, (__v8si)__A,
                                              (__v8si)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbusd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -47,7 +48,7 @@
                                      (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpbusd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -55,14 +56,14 @@
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpbusds_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_vpdpbusds256((__v8si)__S, (__v8si)__A,
                                               (__v8si)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpbusds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -70,7 +71,7 @@
                                     (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpbusds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -78,14 +79,14 @@
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpwssd_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_vpdpwssd256((__v8si)__S, (__v8si)__A,
                                              (__v8si)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwssd_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -93,7 +94,7 @@
                                      (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpwssd_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -101,14 +102,14 @@
                                      (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_dpwssds_epi32(__m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_vpdpwssds256((__v8si)__S, (__v8si)__A,
                                               (__v8si)__B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_dpwssds_epi32(__m256i __S, __mmask8 __U, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -116,7 +117,7 @@
                                     (__v8si)__S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_dpwssds_epi32(__mmask8 __U, __m256i __S, __m256i __A, __m256i __B)
 {
   return (__m256i)__builtin_ia32_selectd_256(__U,
@@ -124,14 +125,14 @@
                                     (__v8si)_mm256_setzero_si256());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpbusd_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpdpbusd128((__v4si)__S, (__v4si)__A,
                                              (__v4si)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbusd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -139,7 +140,7 @@
                                         (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbusd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -147,14 +148,14 @@
                                         (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpbusds_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpdpbusds128((__v4si)__S, (__v4si)__A,
                                               (__v4si)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpbusds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -162,7 +163,7 @@
                                        (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpbusds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -170,14 +171,14 @@
                                        (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpwssd_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpdpwssd128((__v4si)__S, (__v4si)__A,
                                              (__v4si)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwssd_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -185,7 +186,7 @@
                                         (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwssd_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -193,14 +194,14 @@
                                         (__v4si)_mm_setzero_si128());
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_dpwssds_epi32(__m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_vpdpwssds128((__v4si)__S, (__v4si)__A,
                                               (__v4si)__B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_dpwssds_epi32(__m128i __S, __mmask8 __U, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -208,7 +209,7 @@
                                        (__v4si)__S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_dpwssds_epi32(__mmask8 __U, __m128i __S, __m128i __A, __m128i __B)
 {
   return (__m128i)__builtin_ia32_selectd_128(__U,
@@ -216,6 +217,7 @@
                                        (__v4si)_mm_setzero_si128());
 }
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avx512vnniintrin.h
===================================================================
--- lib/Headers/avx512vnniintrin.h
+++ lib/Headers/avx512vnniintrin.h
@@ -29,7 +29,7 @@
 #define __AVX512VNNIINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vnni"), __min_vector_width__(512)))
 
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS
Index: lib/Headers/avx512vpopcntdqintrin.h
===================================================================
--- lib/Headers/avx512vpopcntdqintrin.h
+++ lib/Headers/avx512vpopcntdqintrin.h
@@ -31,8 +31,7 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd"   \
-                                                            "q")))
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq"), __min_vector_width__(512)))
 
 static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) {
   return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A);
Index: lib/Headers/avx512vpopcntdqvlintrin.h
===================================================================
--- lib/Headers/avx512vpopcntdqvlintrin.h
+++ lib/Headers/avx512vpopcntdqvlintrin.h
@@ -30,69 +30,76 @@
 #define __AVX512VPOPCNTDQVLINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS                                                     \
-  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl")))
+#define __DEFAULT_FN_ATTRS128                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256                                                  \
+  __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntdq,avx512vl"), __min_vector_width__(256)))
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi64(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_popcnt_epi64(__m128i __A) {
   return (__m128i)__builtin_ia32_vpopcntq_128((__v2di)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_popcnt_epi64(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectq_128(
       (__mmask8)__U, (__v2di)_mm_popcnt_epi64(__A), (__v2di)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_popcnt_epi64(__mmask8 __U, __m128i __A) {
   return _mm_mask_popcnt_epi64((__m128i)_mm_setzero_si128(), __U, __A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_popcnt_epi32(__m128i __A) {
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
+_mm_popcnt_epi32(__m128i __A) {
   return (__m128i)__builtin_ia32_vpopcntd_128((__v4si)__A);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_mask_popcnt_epi32(__m128i __W, __mmask8 __U, __m128i __A) {
   return (__m128i)__builtin_ia32_selectd_128(
       (__mmask8)__U, (__v4si)_mm_popcnt_epi32(__A), (__v4si)__W);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS
+static __inline__ __m128i __DEFAULT_FN_ATTRS128
 _mm_maskz_popcnt_epi32(__mmask8 __U, __m128i __A) {
   return _mm_mask_popcnt_epi32((__m128i)_mm_setzero_si128(), __U, __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi64(__m256i __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_popcnt_epi64(__m256i __A) {
   return (__m256i)__builtin_ia32_vpopcntq_256((__v4di)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_popcnt_epi64(__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectq_256(
       (__mmask8)__U, (__v4di)_mm256_popcnt_epi64(__A), (__v4di)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_popcnt_epi64(__mmask8 __U, __m256i __A) {
   return _mm256_mask_popcnt_epi64((__m256i)_mm256_setzero_si256(), __U, __A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS _mm256_popcnt_epi32(__m256i __A) {
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_popcnt_epi32(__m256i __A) {
   return (__m256i)__builtin_ia32_vpopcntd_256((__v8si)__A);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_mask_popcnt_epi32(__m256i __W, __mmask8 __U, __m256i __A) {
   return (__m256i)__builtin_ia32_selectd_256(
       (__mmask8)__U, (__v8si)_mm256_popcnt_epi32(__A), (__v8si)__W);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_maskz_popcnt_epi32(__mmask8 __U, __m256i __A) {
   return _mm256_mask_popcnt_epi32((__m256i)_mm256_setzero_si256(), __U, __A);
 }
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif
Index: lib/Headers/avxintrin.h
===================================================================
--- lib/Headers/avxintrin.h
+++ lib/Headers/avxintrin.h
@@ -50,7 +50,8 @@
 typedef long long __m256i __attribute__((__vector_size__(32)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(256)))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("avx"), __min_vector_width__(128)))
 
 /* Arithmetic */
 /// Adds two 256-bit vectors of [4 x double].
@@ -780,7 +781,7 @@
 ///      1: Bits [127:64] of the source are copied to bits [127:64] of the
 ///         returned vector.
 /// \returns A 128-bit vector of [2 x double] containing the copied values.
-static __inline __m128d __DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS128
 _mm_permutevar_pd(__m128d __a, __m128i __c)
 {
   return (__m128d)__builtin_ia32_vpermilvarpd((__v2df)__a, (__v2di)__c);
@@ -873,7 +874,7 @@
 ///      11: Bits [127:96] of the source are copied to bits [127:96] of the
 ///          returned vector.
 /// \returns A 128-bit vector of [4 x float] containing the copied values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_permutevar_ps(__m128 __a, __m128i __c)
 {
   return (__m128)__builtin_ia32_vpermilvarps((__v4sf)__a, (__v4si)__c);
@@ -2497,7 +2498,7 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns the ZF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS128
 _mm_testz_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestzpd((__v2df)__a, (__v2df)__b);
@@ -2526,7 +2527,7 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns the CF flag in the EFLAGS register.
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS128
 _mm_testc_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestcpd((__v2df)__a, (__v2df)__b);
@@ -2556,7 +2557,7 @@
 /// \param __b
 ///    A 128-bit vector of [2 x double].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS128
 _mm_testnzc_pd(__m128d __a, __m128d __b)
 {
   return __builtin_ia32_vtestnzcpd((__v2df)__a, (__v2df)__b);
@@ -2585,7 +2586,7 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns the ZF flag.
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS128
 _mm_testz_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestzps((__v4sf)__a, (__v4sf)__b);
@@ -2614,7 +2615,7 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns the CF flag.
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS128
 _mm_testc_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestcps((__v4sf)__a, (__v4sf)__b);
@@ -2644,7 +2645,7 @@
 /// \param __b
 ///    A 128-bit vector of [4 x float].
 /// \returns 1 if both the ZF and CF flags are set to 0, otherwise returns 0.
-static __inline int __DEFAULT_FN_ATTRS
+static __inline int __DEFAULT_FN_ATTRS128
 _mm_testnzc_ps(__m128 __a, __m128 __b)
 {
   return __builtin_ia32_vtestnzcps((__v4sf)__a, (__v4sf)__b);
@@ -2948,7 +2949,7 @@
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VZEROALL </c> instruction.
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 _mm256_zeroall(void)
 {
   __builtin_ia32_vzeroall();
@@ -2959,7 +2960,7 @@
 /// \headerfile <x86intrin.h>
 ///
 /// This intrinsic corresponds to the <c> VZEROUPPER </c> instruction.
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __attribute__((__always_inline__, __nodebug__, __target__("avx")))
 _mm256_zeroupper(void)
 {
   __builtin_ia32_vzeroupper();
@@ -2978,7 +2979,7 @@
 ///    The single-precision floating point value to be broadcast.
 /// \returns A 128-bit vector of [4 x float] whose 32-bit elements are set
 ///    equal to the broadcast value.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_broadcast_ss(float const *__a)
 {
   float __f = *__a;
@@ -3327,7 +3328,7 @@
 ///    corresponding value in the memory location is not loaded and the
 ///    corresponding field in the return value is set to zero.
 /// \returns A 128-bit vector of [2 x double] containing the loaded values.
-static __inline __m128d __DEFAULT_FN_ATTRS
+static __inline __m128d __DEFAULT_FN_ATTRS128
 _mm_maskload_pd(double const *__p, __m128i __m)
 {
   return (__m128d)__builtin_ia32_maskloadpd((const __v2df *)__p, (__v2di)__m);
@@ -3376,7 +3377,7 @@
 ///    corresponding value in the memory location is not loaded and the
 ///    corresponding field in the return value is set to zero.
 /// \returns A 128-bit vector of [4 x float] containing the loaded values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_maskload_ps(float const *__p, __m128i __m)
 {
   return (__m128)__builtin_ia32_maskloadps((const __v4sf *)__p, (__v4si)__m);
@@ -3449,7 +3450,7 @@
 ///    changed.
 /// \param __a
 ///    A 128-bit vector of [2 x double] containing the values to be stored.
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS128
 _mm_maskstore_pd(double *__p, __m128i __m, __m128d __a)
 {
   __builtin_ia32_maskstorepd((__v2df *)__p, (__v2di)__m, (__v2df)__a);
@@ -3497,7 +3498,7 @@
 ///    changed.
 /// \param __a
 ///    A 128-bit vector of [4 x float] containing the values to be stored.
-static __inline void __DEFAULT_FN_ATTRS
+static __inline void __DEFAULT_FN_ATTRS128
 _mm_maskstore_ps(float *__p, __m128i __m, __m128 __a)
 {
   __builtin_ia32_maskstoreps((__v4sf *)__p, (__v4si)__m, (__v4sf)__a);
@@ -5057,5 +5058,6 @@
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
 
 #endif /* __AVXINTRIN_H */
Index: lib/Headers/emmintrin.h
===================================================================
--- lib/Headers/emmintrin.h
+++ lib/Headers/emmintrin.h
@@ -45,8 +45,8 @@
 typedef signed char __v16qs __attribute__((__vector_size__(16)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
-#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse2"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse2"), __min_vector_width__(64)))
 
 /// Adds lower double-precision values in both operands and returns the
 ///    sum in the lower 64 bits of the result. The upper 64 bits of the result
@@ -4093,7 +4093,7 @@
 ///    A pointer to the 32-bit memory location used to store the value.
 /// \param __a
 ///    A 32-bit integer containing the value to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
 _mm_stream_si32(int *__p, int __a)
 {
   __builtin_ia32_movnti(__p, __a);
@@ -4113,7 +4113,7 @@
 ///    A pointer to the 64-bit memory location used to store the value.
 /// \param __a
 ///    A 64-bit integer containing the value to be stored.
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("sse2")))
 _mm_stream_si64(long long *__p, long long __a)
 {
   __builtin_ia32_movnti64(__p, __a);
Index: lib/Headers/f16cintrin.h
===================================================================
--- lib/Headers/f16cintrin.h
+++ lib/Headers/f16cintrin.h
@@ -29,8 +29,10 @@
 #define __F16CINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS \
-  __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+#define __DEFAULT_FN_ATTRS128 \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 \
+  __attribute__((__always_inline__, __nodebug__, __target__("f16c"), __min_vector_width__(256)))
 
 /* NOTE: Intel documents the 128-bit versions of these as being in emmintrin.h,
  * but that's because icc can emulate these without f16c using a library call.
@@ -47,7 +49,7 @@
 /// \param __a
 ///    A 16-bit half-precision float value.
 /// \returns The converted 32-bit float value.
-static __inline float __DEFAULT_FN_ATTRS
+static __inline float __DEFAULT_FN_ATTRS128
 _cvtsh_ss(unsigned short __a)
 {
   __v8hi v = {(short)__a, 0, 0, 0, 0, 0, 0, 0};
@@ -118,7 +120,7 @@
 ///    A 128-bit vector containing 16-bit half-precision float values. The lower
 ///    64 bits are used in the conversion.
 /// \returns A 128-bit vector of [4 x float] containing converted float values.
-static __inline __m128 __DEFAULT_FN_ATTRS
+static __inline __m128 __DEFAULT_FN_ATTRS128
 _mm_cvtph_ps(__m128i __a)
 {
   return (__m128)__builtin_ia32_vcvtph2ps((__v8hi)__a);
@@ -162,12 +164,13 @@
 ///    converted to 32-bit single-precision float values.
 /// \returns A vector of [8 x float] containing the converted 32-bit
 ///    single-precision float values.
-static __inline __m256 __attribute__((__always_inline__, __nodebug__, __target__("f16c")))
+static __inline __m256 __DEFAULT_FN_ATTRS256
 _mm256_cvtph_ps(__m128i __a)
 {
   return (__m256)__builtin_ia32_vcvtph2ps256((__v8hi)__a);
 }
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __F16CINTRIN_H */
Index: lib/Headers/fma4intrin.h
===================================================================
--- lib/Headers/fma4intrin.h
+++ lib/Headers/fma4intrin.h
@@ -31,200 +31,202 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma4")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma4"), __min_vector_width__(256)))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_macc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_macc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_macc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_macc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_msub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_msub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_msub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_msub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_nmacc_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_nmacc_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_nmacc_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_nmacc_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_nmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_nmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_nmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_nmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_maddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_maddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_msubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_msubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_macc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_macc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_msub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_msub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_nmacc_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_nmacc_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_nmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_nmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_maddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_maddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_msubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_msubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __FMA4INTRIN_H */
Index: lib/Headers/fmaintrin.h
===================================================================
--- lib/Headers/fmaintrin.h
+++ lib/Headers/fmaintrin.h
@@ -29,200 +29,202 @@
 #define __FMAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("fma")))
+#define __DEFAULT_FN_ATTRS128 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("fma"), __min_vector_width__(256)))
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fnmadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fnmadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fnmadd_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fnmadd_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fnmsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddps(-(__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fnmsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddpd(-(__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fnmsub_ss(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddss3((__v4sf)__A, -(__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fnmsub_sd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsd3((__v2df)__A, -(__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fmaddsub_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, (__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fmaddsub_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, (__v2df)__C);
 }
 
-static __inline__ __m128 __DEFAULT_FN_ATTRS
+static __inline__ __m128 __DEFAULT_FN_ATTRS128
 _mm_fmsubadd_ps(__m128 __A, __m128 __B, __m128 __C)
 {
   return (__m128)__builtin_ia32_vfmaddsubps((__v4sf)__A, (__v4sf)__B, -(__v4sf)__C);
 }
 
-static __inline__ __m128d __DEFAULT_FN_ATTRS
+static __inline__ __m128d __DEFAULT_FN_ATTRS128
 _mm_fmsubadd_pd(__m128d __A, __m128d __B, __m128d __C)
 {
   return (__m128d)__builtin_ia32_vfmaddsubpd((__v2df)__A, (__v2df)__B, -(__v2df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_fmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_fmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_fmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_fmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_fnmadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_fnmadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_fnmsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddps256(-(__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_fnmsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddpd256(-(__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_fmaddsub_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, (__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_fmaddsub_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, (__v4df)__C);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_fmsubadd_ps(__m256 __A, __m256 __B, __m256 __C)
 {
   return (__m256)__builtin_ia32_vfmaddsubps256((__v8sf)__A, (__v8sf)__B, -(__v8sf)__C);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_fmsubadd_pd(__m256d __A, __m256d __B, __m256d __C)
 {
   return (__m256d)__builtin_ia32_vfmaddsubpd256((__v4df)__A, (__v4df)__B, -(__v4df)__C);
 }
 
-#undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS128
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __FMAINTRIN_H */
Index: lib/Headers/gfniintrin.h
===================================================================
--- lib/Headers/gfniintrin.h
+++ lib/Headers/gfniintrin.h
@@ -120,16 +120,17 @@
         U, A, B, I)
 
 /* Default attributes for simple form (no masking). */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"), __min_vector_width__(128)))
 
 /* Default attributes for YMM unmasked form. */
-#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni")))
+#define __DEFAULT_FN_ATTRS_Y __attribute__((__always_inline__, __nodebug__, __target__("avx,gfni"), __min_vector_width__(256)))
 
 /* Default attributes for ZMM forms. */
-#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni")))
+#define __DEFAULT_FN_ATTRS_Z __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"), __min_vector_width__(512)))
 
 /* Default attributes for VLX forms. */
-#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni")))
+#define __DEFAULT_FN_ATTRS_VL128 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_VL256 __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"), __min_vector_width__(256)))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_gf2p8mul_epi8(__m128i __A, __m128i __B)
@@ -138,7 +139,7 @@
               (__v16qi) __B);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
+static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
 _mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B)
 {
   return (__m128i) __builtin_ia32_selectb_128(__U,
@@ -146,7 +147,7 @@
               (__v16qi) __S);
 }
 
-static __inline__ __m128i __DEFAULT_FN_ATTRS_VL
+static __inline__ __m128i __DEFAULT_FN_ATTRS_VL128
 _mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B)
 {
   return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(),
@@ -160,7 +161,7 @@
               (__v32qi) __B);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
+static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
 _mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B)
 {
   return (__m256i) __builtin_ia32_selectb_256(__U,
@@ -168,7 +169,7 @@
               (__v32qi) __S);
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS_VL
+static __inline__ __m256i __DEFAULT_FN_ATTRS_VL256
 _mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B)
 {
   return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(),
@@ -200,7 +201,8 @@
 #undef __DEFAULT_FN_ATTRS
 #undef __DEFAULT_FN_ATTRS_Y
 #undef __DEFAULT_FN_ATTRS_Z
-#undef __DEFAULT_FN_ATTRS_VL
+#undef __DEFAULT_FN_ATTRS_VL128
+#undef __DEFAULT_FN_ATTRS_VL256
 
 #endif /* __GFNIINTRIN_H */
 
Index: lib/Headers/mm3dnow.h
===================================================================
--- lib/Headers/mm3dnow.h
+++ lib/Headers/mm3dnow.h
@@ -30,9 +30,9 @@
 typedef float __v2sf __attribute__((__vector_size__(8)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnow"), __min_vector_width__(64)))
 
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void __attribute__((__always_inline__, __nodebug__, __target__("3dnow")))
 _m_femms(void) {
   __builtin_ia32_femms();
 }
@@ -134,7 +134,7 @@
 
 /* Handle the 3dnowa instructions here. */
 #undef __DEFAULT_FN_ATTRS
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("3dnowa"), __min_vector_width__(64)))
 
 static __inline__ __m64 __DEFAULT_FN_ATTRS
 _m_pf2iw(__m64 __m) {
Index: lib/Headers/mmintrin.h
===================================================================
--- lib/Headers/mmintrin.h
+++ lib/Headers/mmintrin.h
@@ -32,7 +32,7 @@
 typedef char __v8qi __attribute__((__vector_size__(8)));
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("mmx"), __min_vector_width__(64)))
 
 /// Clears the MMX state by setting the state of the x87 stack registers
 ///    to empty.
@@ -41,7 +41,7 @@
 ///
 /// This intrinsic corresponds to the <c> EMMS </c> instruction.
 ///
-static __inline__ void __DEFAULT_FN_ATTRS
+static __inline__ void  __attribute__((__always_inline__, __nodebug__, __target__("mmx")))
 _mm_empty(void)
 {
     __builtin_ia32_emms();
Index: lib/Headers/pmmintrin.h
===================================================================
--- lib/Headers/pmmintrin.h
+++ lib/Headers/pmmintrin.h
@@ -28,7 +28,7 @@
 
 /* Define the default attributes for the functions in this file. */
 #define __DEFAULT_FN_ATTRS \
-  __attribute__((__always_inline__, __nodebug__, __target__("sse3")))
+  __attribute__((__always_inline__, __nodebug__, __target__("sse3"), __min_vector_width__(128)))
 
 /// Loads data from an unaligned memory location to elements in a 128-bit
 ///    vector.
Index: lib/Headers/shaintrin.h
===================================================================
--- lib/Headers/shaintrin.h
+++ lib/Headers/shaintrin.h
@@ -29,7 +29,7 @@
 #define __SHAINTRIN_H
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sha"), __min_vector_width__(128)))
 
 #define _mm_sha1rnds4_epu32(V1, V2, M) \
   __builtin_ia32_sha1rnds4((__v4si)(__m128i)(V1), (__v4si)(__m128i)(V2), (M))
Index: lib/Headers/smmintrin.h
===================================================================
--- lib/Headers/smmintrin.h
+++ lib/Headers/smmintrin.h
@@ -27,7 +27,7 @@
 #include <tmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse4.1"), __min_vector_width__(128)))
 
 /* SSE4 Rounding macros. */
 #define _MM_FROUND_TO_NEAREST_INT    0x00
Index: lib/Headers/tmmintrin.h
===================================================================
--- lib/Headers/tmmintrin.h
+++ lib/Headers/tmmintrin.h
@@ -27,8 +27,8 @@
 #include <pmmintrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3")))
-#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("ssse3"), __min_vector_width__(64)))
+#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,ssse3"), __min_vector_width__(64)))
 
 /// Computes the absolute value of each of the packed 8-bit signed
 ///    integers in the source operand and stores the 8-bit unsigned integer
Index: lib/Headers/vaesintrin.h
===================================================================
--- lib/Headers/vaesintrin.h
+++ lib/Headers/vaesintrin.h
@@ -29,10 +29,10 @@
 #define __VAESINTRIN_H
 
 /* Default attributes for YMM forms. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"), __min_vector_width__(256)))
 
 /* Default attributes for ZMM forms. */
-#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes")))
+#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"), __min_vector_width__(512)))
 
 
 static __inline__ __m256i __DEFAULT_FN_ATTRS
Index: lib/Headers/xmmintrin.h
===================================================================
--- lib/Headers/xmmintrin.h
+++ lib/Headers/xmmintrin.h
@@ -40,8 +40,8 @@
 #endif
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse")))
-#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("sse"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS_MMX __attribute__((__always_inline__, __nodebug__, __target__("mmx,sse"), __min_vector_width__(64)))
 
 /// Adds the 32-bit float values in the low-order bits of the operands.
 ///
Index: lib/Headers/xopintrin.h
===================================================================
--- lib/Headers/xopintrin.h
+++ lib/Headers/xopintrin.h
@@ -31,7 +31,8 @@
 #include <fma4intrin.h>
 
 /* Define the default attributes for the functions in this file. */
-#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop")))
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(128)))
+#define __DEFAULT_FN_ATTRS256 __attribute__((__always_inline__, __nodebug__, __target__("xop"), __min_vector_width__(256)))
 
 static __inline__ __m128i __DEFAULT_FN_ATTRS
 _mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
@@ -201,7 +202,7 @@
   return (__m128i)(((__v2du)__A & (__v2du)__C) | ((__v2du)__B & ~(__v2du)__C));
 }
 
-static __inline__ __m256i __DEFAULT_FN_ATTRS
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
 _mm256_cmov_si256(__m256i __A, __m256i __B, __m256i __C)
 {
   return (__m256i)(((__v4du)__A & (__v4du)__C) | ((__v4du)__B & ~(__v4du)__C));
@@ -765,18 +766,19 @@
   return (__m128d)__builtin_ia32_vfrczpd((__v2df)__A);
 }
 
-static __inline__ __m256 __DEFAULT_FN_ATTRS
+static __inline__ __m256 __DEFAULT_FN_ATTRS256
 _mm256_frcz_ps(__m256 __A)
 {
   return (__m256)__builtin_ia32_vfrczps256((__v8sf)__A);
 }
 
-static __inline__ __m256d __DEFAULT_FN_ATTRS
+static __inline__ __m256d __DEFAULT_FN_ATTRS256
 _mm256_frcz_pd(__m256d __A)
 {
   return (__m256d)__builtin_ia32_vfrczpd256((__v4df)__A);
 }
 
 #undef __DEFAULT_FN_ATTRS
+#undef __DEFAULT_FN_ATTRS256
 
 #endif /* __XOPINTRIN_H */
Index: lib/Sema/SemaDeclAttr.cpp
===================================================================
--- lib/Sema/SemaDeclAttr.cpp
+++ lib/Sema/SemaDeclAttr.cpp
@@ -2942,6 +2942,25 @@
   D->addAttr(NewAttr);
 }
 
+static void handleMinVectorWidthAttr(Sema &S, Decl *D, const AttributeList &AL) {
+  Expr *E = AL.getArgAsExpr(0);
+  uint32_t VecWidth;
+  if (!checkUInt32Argument(S, AL, E, VecWidth)) {
+    AL.setInvalid();
+    return;
+  }
+
+  MinVectorWidthAttr *Existing = D->getAttr<MinVectorWidthAttr>();
+  if (Existing && Existing->getVectorWidth() != VecWidth) {
+    S.Diag(AL.getLoc(), diag::warn_duplicate_attribute) << AL.getName();
+    return;
+  }
+
+  D->addAttr(::new (S.Context)
+             MinVectorWidthAttr(AL.getRange(), S.Context, VecWidth,
+                                AL.getAttributeSpellingListIndex()));
+}
+
 static void handleCleanupAttr(Sema &S, Decl *D, const AttributeList &AL) {
   Expr *E = AL.getArgAsExpr(0);
   SourceLocation Loc = E->getExprLoc();
@@ -6130,6 +6149,9 @@
   case AttributeList::AT_Target:
     handleTargetAttr(S, D, AL);
     break;
+  case AttributeList::AT_MinVectorWidth:
+    handleMinVectorWidthAttr(S, D, AL);
+    break;
   case AttributeList::AT_Unavailable:
     handleAttrWithMessage<UnavailableAttr>(S, D, AL);
     break;
Index: test/CodeGen/function-min-vector-width.c
===================================================================
--- /dev/null
+++ test/CodeGen/function-min-vector-width.c
@@ -0,0 +1,7 @@
+// This test verifies that we produce min-legal-vector-width attributes
+
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -emit-llvm -o - %s | FileCheck %s
+
+void __attribute((__min_vector_width__(128))) foo() {}
+
+// CHECK: "min-legal-vector-width"="128"
Index: test/CodeGen/x86-builtins-vector-width.c
===================================================================
--- /dev/null
+++ test/CodeGen/x86-builtins-vector-width.c
@@ -0,0 +1,32 @@
+// RUN: %clang_cc1 -triple i686-linux-gnu -target-cpu i686 -emit-llvm %s -o - | FileCheck %s
+
+typedef signed long long V2LLi __attribute__((vector_size(16)));
+typedef signed long long V4LLi __attribute__((vector_size(32)));
+
+// Make sure builtin forces a min-legal-width attribute
+void foo(void) {
+  V2LLi  tmp_V2LLi;
+
+  tmp_V2LLi = __builtin_ia32_undef128();
+}
+
+// Make sure explicit attribute larger than builtin wins.
+void goo(void) __attribute__((__min_vector_width__(256))) {
+  V2LLi  tmp_V2LLi;
+
+  tmp_V2LLi = __builtin_ia32_undef128();
+}
+
+// Make sure builtin larger than explicit attribute wins.
+void hoo(void) __attribute__((__min_vector_width__(128))) {
+  V4LLi  tmp_V4LLi;
+
+  tmp_V4LLi = __builtin_ia32_undef256();
+}
+
+// CHECK: foo{{.*}} #0
+// CHECK: goo{{.*}} #1
+// CHECK: hoo{{.*}} #1
+
+// CHECK: #0 = {{.*}}"min-legal-vector-width"="128"
+// CHECK: #1 = {{.*}}"min-legal-vector-width"="256"
Index: test/Misc/pragma-attribute-supported-attributes-list.test
===================================================================
--- test/Misc/pragma-attribute-supported-attributes-list.test
+++ test/Misc/pragma-attribute-supported-attributes-list.test
@@ -2,7 +2,7 @@
 
 // The number of supported attributes should never go down!
 
-// CHECK: #pragma clang attribute supports 69 attributes:
+// CHECK: #pragma clang attribute supports 70 attributes:
 // CHECK-NEXT: AMDGPUFlatWorkGroupSize (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumSGPR (SubjectMatchRule_function)
 // CHECK-NEXT: AMDGPUNumVGPR (SubjectMatchRule_function)
@@ -32,6 +32,7 @@
 // CHECK-NEXT: InternalLinkage (SubjectMatchRule_variable, SubjectMatchRule_function, SubjectMatchRule_record)
 // CHECK-NEXT: LTOVisibilityPublic (SubjectMatchRule_record)
 // CHECK-NEXT: MicroMips (SubjectMatchRule_function)
+// CHECK-NEXT: MinVectorWidth (SubjectMatchRule_function)
 // CHECK-NEXT: MipsLongCall (SubjectMatchRule_function)
 // CHECK-NEXT: MipsShortCall (SubjectMatchRule_function)
 // CHECK-NEXT: NoDebug (SubjectMatchRule_hasType_functionType, SubjectMatchRule_objc_method, SubjectMatchRule_variable_not_is_parameter)
Index: test/Sema/attr-min-vector-width.c
===================================================================
--- /dev/null
+++ test/Sema/attr-min-vector-width.c
@@ -0,0 +1,16 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
+
+int i;
+void f(void) __attribute__((__min_vector_width__(i))); /* expected-error {{'__min_vector_width__' attribute requires an integer constant}} */
+
+void f2(void) __attribute__((__min_vector_width__(128)));
+
+void f3(void) __attribute__((__min_vector_width__(128), __min_vector_width__(256))); /* expected-warning {{attribute '__min_vector_width__' is already applied with different parameters}} */
+
+void f4(void) __attribute__((__min_vector_width__())); /* expected-error {{'__min_vector_width__' attribute takes one argument}} */
+
+void f5(void) __attribute__((__min_vector_width__(128, 256))); /* expected-error {{'__min_vector_width__' attribute takes one argument}} */
+
+void f6(void) {
+  int x __attribute__((__min_vector_width__(128))) = 0; /* expected-error {{'__min_vector_width__' attribute only applies to functions}} */
+}
Index: unittests/AST/ASTImporterTest.cpp
===================================================================
--- unittests/AST/ASTImporterTest.cpp
+++ unittests/AST/ASTImporterTest.cpp
@@ -274,7 +274,7 @@
 };
 
 // This class provides generic methods to write tests which can check internal
-// attributes of AST nodes like getPreviousDecl(), isVirtual(), etc.  Also,
+// attributes of AST nodes like getPreviousDecl(), isVirtual(), etc. Also,
 // this fixture makes it possible to import from several "From" contexts.
 class ASTImporterTestBase : public ParameterizedTestsFixture {