Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -439,6 +439,17 @@ TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes") TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +// GFNI +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v16qi, "V16cV16cV16cIc", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineinvqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v16qi, "V16cV16cV16cIc", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v32qi, "V32cV32cV32cIc", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8affineqb_v64qi, "V64cV64cV64cIc", "", "avx512bw,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v16qi, "V16cV16cV16c", "", "gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v32qi, "V32cV32cV32c", "", "avx,gfni") +TARGET_BUILTIN(__builtin_ia32_vgf2p8mulb_v64qi, "V64cV64cV64c", "", "avx512bw,gfni") + // CLMUL TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul") Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -2511,6 +2511,8 @@ def mno_fsgsbase : Flag<["-"], "mno-fsgsbase">, Group; def mfxsr : Flag<["-"], "mfxsr">, Group; def mno_fxsr : Flag<["-"], "mno-fxsr">, Group; +def mgfni : Flag<["-"], "mgfni">, Group; +def mno_gfni : Flag<["-"], "mno-gfni">, Group; def mlwp : Flag<["-"], "mlwp">, Group; def mno_lwp : Flag<["-"], "mno-lwp">, Group; def mlzcnt : Flag<["-"], "mlzcnt">, Group; Index: lib/Basic/Targets/X86.h =================================================================== --- lib/Basic/Targets/X86.h +++ lib/Basic/Targets/X86.h @@ -50,6 +50,7 @@ bool HasAES = false; bool HasVAES = false; bool HasPCLMUL = false; + bool HasGFNI = false; bool HasLZCNT = false; bool HasRDRND = false; bool HasFSGSBASE = false; Index: lib/Basic/Targets/X86.cpp =================================================================== --- lib/Basic/Targets/X86.cpp +++ lib/Basic/Targets/X86.cpp @@ -133,6 +133,7 @@ case CK_Icelake: setFeatureEnabledImpl(Features, "vaes", true); + setFeatureEnabledImpl(Features, "gfni", true); // TODO: Add icelake features here. LLVM_FALLTHROUGH; case CK_Cannonlake: @@ -444,7 +445,7 @@ LLVM_FALLTHROUGH; case SSE2: Features["sse2"] = Features["pclmul"] = Features["aes"] = Features["sha"] = - false; + Features["gfni"] = false; LLVM_FALLTHROUGH; case SSE3: Features["sse3"] = false; @@ -583,6 +584,9 @@ } else if (Name == "pclmul") { if (Enabled) setSSELevel(Features, SSE2, Enabled); + } else if (Name == "gfni") { + if (Enabled) + setSSELevel(Features, SSE2, Enabled); } else if (Name == "avx") { setSSELevel(Features, AVX, Enabled); } else if (Name == "avx2") { @@ -676,6 +680,8 @@ HasFMA = true; } else if (Feature == "+f16c") { HasF16C = true; + } else if (Feature == "+gfni") { + HasGFNI = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; } else if (Feature == "+avx512vpopcntdq") { @@ -1009,6 +1015,9 @@ if (HasF16C) Builder.defineMacro("__F16C__"); + if (HasGFNI) + Builder.defineMacro("__GFNI__"); + if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); if (HasAVX512VPOPCNTDQ) @@ -1172,6 +1181,7 @@ .Case("fma4", true) .Case("fsgsbase", true) .Case("fxsr", true) + .Case("gfni", true) .Case("lwp", true) .Case("lzcnt", true) .Case("mmx", true) @@ -1235,6 +1245,7 @@ .Case("fma4", XOPLevel >= FMA4) .Case("fsgsbase", HasFSGSBASE) .Case("fxsr", HasFXSR) + .Case("gfni", HasGFNI) .Case("ibt", HasIBT) .Case("lwp", HasLWP) .Case("lzcnt", HasLZCNT) Index: lib/Headers/CMakeLists.txt =================================================================== --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -42,6 +42,7 @@ fma4intrin.h fmaintrin.h fxsrintrin.h + gfniintrin.h htmintrin.h htmxlintrin.h ia32intrin.h Index: lib/Headers/gfniintrin.h =================================================================== --- lib/Headers/gfniintrin.h +++ lib/Headers/gfniintrin.h @@ -0,0 +1,202 @@ +/*===----------------- gfniintrin.h - GFNI intrinsics ----------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __GFNIINTRIN_H +#define __GFNIINTRIN_H + + +#define _mm_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vgf2p8affineinvqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)); }) + +#define _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m128i)_mm_mask_gf2p8affineinv_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I); }) + + +#define _mm256_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vgf2p8affineinvqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)); }) + +#define _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m256i)_mm256_mask_gf2p8affineinv_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I); }) + + +#define _mm512_gf2p8affineinv_epi64_epi8(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vgf2p8affineinvqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I)); }) + +#define _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affineinv_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m512i)_mm512_mask_gf2p8affineinv_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + U, A, B, I); }) + +#define _mm_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_vgf2p8affineqb_v16qi((__v16qi)(__m128i)(A), \ + (__v16qi)(__m128i)(B), \ + (char)(I)); }) + +#define _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m128i)__builtin_ia32_selectb_128((__mmask16)(U), \ + (__v16qi)_mm_gf2p8affine_epi64_epi8(A, B, I), \ + (__v16qi)(__m128i)(S)); }) + + +#define _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m128i)_mm_mask_gf2p8affine_epi64_epi8((__m128i)_mm_setzero_si128(), \ + U, A, B, I); }) + + +#define _mm256_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_vgf2p8affineqb_v32qi((__v32qi)(__m256i)(A), \ + (__v32qi)(__m256i)(B), \ + (char)(I)); }) + +#define _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m256i)__builtin_ia32_selectb_256((__mmask32)(U), \ + (__v32qi)_mm256_gf2p8affine_epi64_epi8(A, B, I), \ + (__v32qi)(__m256i)(S)); }) + +#define _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m256i)_mm256_mask_gf2p8affine_epi64_epi8((__m256i)_mm256_setzero_si256(), \ + U, A, B, I); }) + + +#define _mm512_gf2p8affine_epi64_epi8(A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_vgf2p8affineqb_v64qi((__v64qi)(__m512i)(A), \ + (__v64qi)(__m512i)(B), \ + (char)(I)); }) + +#define _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, I) __extension__ ({ \ + (__m512i)__builtin_ia32_selectb_512((__mmask64)(U), \ + (__v64qi)_mm512_gf2p8affine_epi64_epi8(A, B, I), \ + (__v64qi)(__m512i)(S)); }) + +#define _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, I) __extension__ ({ \ + (__m512i)_mm512_mask_gf2p8affine_epi64_epi8((__m512i)_mm512_setzero_qi(), \ + U, A, B, I); }) + +/* Default attributes for simple form (no masking). */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("gfni"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,gfni"))) + +/* Default attributes for VLX forms. */ +#define __DEFAULT_FN_ATTRS_VL __attribute__((__always_inline__, __nodebug__, __target__("avx512bw,avx512vl,gfni"))) + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm_gf2p8mul_epi8(__m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_vgf2p8mulb_v16qi((__v16qi) __A, + (__v16qi) __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_mask_gf2p8mul_epi8(__m128i __S, __mmask16 __U, __m128i __A, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128(__U, + (__v16qi) _mm_gf2p8mul_epi8(__A, __B), + (__v16qi) __S); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS_VL +_mm_maskz_gf2p8mul_epi8(__mmask16 __U, __m128i __A, __m128i __B) +{ + return _mm_mask_gf2p8mul_epi8((__m128i)_mm_setzero_si128(), + __U, __A, __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_gf2p8mul_epi8(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_vgf2p8mulb_v32qi((__v32qi) __A, + (__v32qi) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_mask_gf2p8mul_epi8(__m256i __S, __mmask32 __U, __m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256(__U, + (__v32qi) _mm256_gf2p8mul_epi8(__A, __B), + (__v32qi) __S); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS_VL +_mm256_maskz_gf2p8mul_epi8(__mmask32 __U, __m256i __A, __m256i __B) +{ + return _mm256_mask_gf2p8mul_epi8((__m256i)_mm256_setzero_si256(), + __U, __A, __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_gf2p8mul_epi8(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_vgf2p8mulb_v64qi((__v64qi) __A, + (__v64qi) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_mask_gf2p8mul_epi8(__m512i __S, __mmask64 __U, __m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectb_512(__U, + (__v64qi) _mm512_gf2p8mul_epi8(__A, __B), + (__v64qi) __S); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F +_mm512_maskz_gf2p8mul_epi8(__mmask64 __U, __m512i __A, __m512i __B) +{ + return _mm512_mask_gf2p8mul_epi8((__m512i)_mm512_setzero_qi(), + __U, __A, __B); +} + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F +#undef __DEFAULT_FN_ATTRS_VL + +#endif // __GFNIINTRIN_H + Index: lib/Headers/immintrin.h =================================================================== --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -212,6 +212,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__GFNI__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) Index: test/CodeGen/attr-target-x86.c =================================================================== --- test/CodeGen/attr-target-x86.c +++ test/CodeGen/attr-target-x86.c @@ -38,7 +38,7 @@ // CHECK: lake{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" // CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-xop,-xsave,-xsaveopt" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" Index: test/CodeGen/gfni-builtins.c =================================================================== --- test/CodeGen/gfni-builtins.c +++ test/CodeGen/gfni-builtins.c @@ -0,0 +1,182 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +gfni -emit-llvm -o - | FileCheck %s --check-prefix SSE +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX -target-feature +gfni -target-feature +avx -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +gfni -target-feature +avx512bw -target-feature +avx512vl -emit-llvm -o - | FileCheck %s --check-prefixes SSE,AVX,AVX512 + +#include + +__m128i test_mm_gf2p8affineinv_epi64_epi8(__m128i A, __m128i B) { + // SSE-LABEL: @test_mm_gf2p8affineinv_epi64_epi8 + // SSE: @llvm.x86.vgf2p8affineinvqb.128 + return _mm_gf2p8affineinv_epi64_epi8(A, B, 1); +} + +__m128i test_mm_gf2p8affine_epi64_epi8(__m128i A, __m128i B) { + // SSE-LABEL: @test_mm_gf2p8affine_epi64_epi8 + // SSE: @llvm.x86.vgf2p8affineqb.128 + return _mm_gf2p8affine_epi64_epi8(A, B, 1); +} + +__m128i test_mm_gf2p8mul_epi8(__m128i A, __m128i B) { + // SSE-LABEL: @test_mm_gf2p8mul_epi8 + // SSE: @llvm.x86.vgf2p8mulb.128 + return _mm_gf2p8mul_epi8(A, B); +} + +#if defined(AVX) || defined(AVX512) +__m256i test_mm256_gf2p8affineinv_epi64_epi8(__m256i A, __m256i B) { + // AVX-LABEL: @test_mm256_gf2p8affineinv_epi64_epi8 + // AVX: @llvm.x86.vgf2p8affineinvqb.256 + return _mm256_gf2p8affineinv_epi64_epi8(A, B, 1); +} + +__m256i test_mm256_gf2p8affine_epi64_epi8(__m256i A, __m256i B) { + // AVX-LABEL: @test_mm256_gf2p8affine_epi64_epi8 + // AVX: @llvm.x86.vgf2p8affineqb.256 + return _mm256_gf2p8affine_epi64_epi8(A, B, 1); +} + +__m256i test_mm256_gf2p8mul_epi8(__m256i A, __m256i B) { + // AVX-LABEL: @test_mm256_gf2p8mul_epi8 + // AVX: @llvm.x86.vgf2p8mulb.256 + return _mm256_gf2p8mul_epi8(A, B); +} +#endif // AVX + +#ifdef AVX512 +__m512i test_mm512_gf2p8affineinv_epi64_epi8(__m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.512 + return _mm512_gf2p8affineinv_epi64_epi8(A, B, 1); +} + +__m512i test_mm512_mask_gf2p8affineinv_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_mask_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1); +} + +__m512i test_mm512_maskz_gf2p8affineinv_epi64_epi8(__mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_maskz_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1); +} + +__m256i test_mm256_mask_gf2p8affineinv_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_mask_gf2p8affineinv_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineinvqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1); +} + +__m256i test_mm256_maskz_gf2p8affineinv_epi64_epi8(__mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_maskz_gf2p8affineinv_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineinvqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1); +} + +__m128i test_mm_mask_gf2p8affineinv_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_mask_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_mask_gf2p8affineinv_epi64_epi8(S, U, A, B, 1); +} + +__m128i test_mm_maskz_gf2p8affineinv_epi64_epi8(__mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_maskz_gf2p8affineinv_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineinvqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_maskz_gf2p8affineinv_epi64_epi8(U, A, B, 1); +} + +__m512i test_mm512_gf2p8affine_epi64_epi8(__m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.512 + return _mm512_gf2p8affine_epi64_epi8(A, B, 1); +} + +__m512i test_mm512_mask_gf2p8affine_epi64_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_mask_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1); +} + +__m512i test_mm512_maskz_gf2p8affine_epi64_epi8(__mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_maskz_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_gf2p8affine_epi64_epi8(U, A, B, 1); +} + +__m256i test_mm256_mask_gf2p8affine_epi64_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_mask_gf2p8affine_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1); +} + +__m256i test_mm256_maskz_gf2p8affine_epi64_epi8(__mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_maskz_gf2p8affine_epi64_epi8 + // AVX256: @llvm.x86.vgf2p8affineqb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_gf2p8affine_epi64_epi8(U, A, B, 1); +} + +__m128i test_mm_mask_gf2p8affine_epi64_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_mask_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_mask_gf2p8affine_epi64_epi8(S, U, A, B, 1); +} + +__m128i test_mm_maskz_gf2p8affine_epi64_epi8(__mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_maskz_gf2p8affine_epi64_epi8 + // AVX512: @llvm.x86.vgf2p8affineqb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_maskz_gf2p8affine_epi64_epi8(U, A, B, 1); +} + +__m512i test_mm512_gf2p8mul_epi8(__m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.512 + return _mm512_gf2p8mul_epi8(A, B); +} + +__m512i test_mm512_mask_gf2p8mul_epi8(__m512i S, __mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_mask_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_gf2p8mul_epi8(S, U, A, B); +} + +__m512i test_mm512_maskz_gf2p8mul_epi8(__mmask64 U, __m512i A, __m512i B) { + // AVX512-LABEL: @test_mm512_maskz_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.512 + // AVX512: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_gf2p8mul_epi8(U, A, B); +} + +__m256i test_mm256_mask_gf2p8mul_epi8(__m256i S, __mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_mask_gf2p8mul_epi8 + // AVX256: @llvm.x86.vgf2p8mulb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_gf2p8mul_epi8(S, U, A, B); +} + +__m256i test_mm256_maskz_gf2p8mul_epi8(__mmask32 U, __m256i A, __m256i B) { + // AVX256-LABEL: @test_mm256_maskz_gf2p8mul_epi8 + // AVX256: @llvm.x86.vgf2p8mulb.256 + // AVX256: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_gf2p8mul_epi8(U, A, B); +} + +__m128i test_mm_mask_gf2p8mul_epi8(__m128i S, __mmask16 U, __m128i A, __m128i B) { + // AVX512-LABEL: @test_mm_mask_gf2p8mul_epi8 + // AVX512: @llvm.x86.vgf2p8mulb.128 + // AVX512: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm_mask_gf2p8mul_epi8(S, U, A, B); +} +#endif // AVX512 Index: test/Driver/x86-target-features.c =================================================================== --- test/Driver/x86-target-features.c +++ test/Driver/x86-target-features.c @@ -100,3 +100,8 @@ // VAES: "-target-feature" "+vaes" // NO-VAES: "-target-feature" "-vaes" +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mgfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=GFNI %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-gfni %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-GFNI %s +// GFNI: "-target-feature" "+gfni" +// NO-GFNI: "-target-feature" "-gfni + Index: test/Preprocessor/predefined-arch-macros.c =================================================================== --- test/Preprocessor/predefined-arch-macros.c +++ test/Preprocessor/predefined-arch-macros.c @@ -1063,6 +1063,7 @@ // CHECK_ICL_M32: #define __CLFLUSHOPT__ 1 // CHECK_ICL_M32: #define __F16C__ 1 // CHECK_ICL_M32: #define __FMA__ 1 +// CHECK_ICL_M32: #define __GFNI__ 1 // CHECK_ICL_M32: #define __LZCNT__ 1 // CHECK_ICL_M32: #define __MMX__ 1 // CHECK_ICL_M32: #define __MPX__ 1 @@ -1109,6 +1110,7 @@ // CHECK_ICL_M64: #define __CLFLUSHOPT__ 1 // CHECK_ICL_M64: #define __F16C__ 1 // CHECK_ICL_M64: #define __FMA__ 1 +// CHECK_ICL_M64: #define __GFNI__ 1 // CHECK_ICL_M64: #define __LZCNT__ 1 // CHECK_ICL_M64: #define __MMX__ 1 // CHECK_ICL_M64: #define __MPX__ 1 Index: test/Preprocessor/x86_target_features.c =================================================================== --- test/Preprocessor/x86_target_features.c +++ test/Preprocessor/x86_target_features.c @@ -379,3 +379,8 @@ // VAESNOAES-NOT: #define __AES__ 1 // VAESNOAES-NOT: #define __VAES__ 1 +// RUN: %clang -target i386-unknown-unknown -march=atom -mgfni -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=GFNI %s + +// GFNI: #define __GFNI__ 1 +// GFNI: #define __SSE2__ 1 +