Index: include/clang/Basic/BuiltinsX86.def =================================================================== --- include/clang/Basic/BuiltinsX86.def +++ include/clang/Basic/BuiltinsX86.def @@ -429,6 +429,16 @@ TARGET_BUILTIN(__builtin_ia32_aesimc128, "V2LLiV2LLi", "", "aes") TARGET_BUILTIN(__builtin_ia32_aeskeygenassist128, "V2LLiV2LLiIc", "", "aes") +// VAES +TARGET_BUILTIN(__builtin_ia32_aesenc256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenc512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesenclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdec512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast256, "V4LLiV4LLiV4LLi", "", "vaes") +TARGET_BUILTIN(__builtin_ia32_aesdeclast512, "V8LLiV8LLiV8LLi", "", "avx512f,vaes") + // CLMUL TARGET_BUILTIN(__builtin_ia32_pclmulqdq128, "V2LLiV2LLiV2LLiIc", "", "pclmul") Index: include/clang/Driver/Options.td =================================================================== --- include/clang/Driver/Options.td +++ include/clang/Driver/Options.td @@ -2543,6 +2543,8 @@ def mno_sha : Flag<["-"], "mno-sha">, Group; def mtbm : Flag<["-"], "mtbm">, Group; def mno_tbm : Flag<["-"], "mno-tbm">, Group; +def mvaes : Flag<["-"], "mvaes">, Group; +def mno_vaes : Flag<["-"], "mno-vaes">, Group; def mxop : Flag<["-"], "mxop">, Group; def mno_xop : Flag<["-"], "mno-xop">, Group; def mxsave : Flag<["-"], "mxsave">, Group; Index: lib/Basic/Targets/X86.h =================================================================== --- lib/Basic/Targets/X86.h +++ lib/Basic/Targets/X86.h @@ -48,6 +48,7 @@ enum XOPEnum { NoXOP, SSE4A, FMA4, XOP } XOPLevel = NoXOP; bool HasAES = false; + bool HasVAES = false; bool HasPCLMUL = false; bool HasLZCNT = false; bool HasRDRND = false; Index: lib/Basic/Targets/X86.cpp =================================================================== --- lib/Basic/Targets/X86.cpp +++ lib/Basic/Targets/X86.cpp @@ -132,6 +132,7 @@ break; case CK_Icelake: + setFeatureEnabledImpl(Features, "vaes", true); // TODO: Add icelake features here. LLVM_FALLTHROUGH; case CK_Cannonlake: @@ -460,7 +461,7 @@ LLVM_FALLTHROUGH; case AVX: Features["fma"] = Features["avx"] = Features["f16c"] = Features["xsave"] = - Features["xsaveopt"] = false; + Features["xsaveopt"] = Features["vaes"] = false; setXOPLevel(Features, FMA4, false); LLVM_FALLTHROUGH; case AVX2: @@ -572,6 +573,13 @@ } else if (Name == "aes") { if (Enabled) setSSELevel(Features, SSE2, Enabled); + else + Features["vaes"] = false; + } else if (Name == "vaes") { + if (Enabled) { + setSSELevel(Features, AVX, Enabled); + Features["aes"] = true; + } } else if (Name == "pclmul") { if (Enabled) setSSELevel(Features, SSE2, Enabled); @@ -636,6 +644,8 @@ if (Feature == "+aes") { HasAES = true; + } else if (Feature == "+vaes") { + HasVAES = true; } else if (Feature == "+pclmul") { HasPCLMUL = true; } else if (Feature == "+lzcnt") { @@ -934,6 +944,9 @@ if (HasAES) Builder.defineMacro("__AES__"); + if (HasVAES) + Builder.defineMacro("__VAES__"); + if (HasPCLMUL) Builder.defineMacro("__PCLMUL__"); @@ -1185,6 +1198,7 @@ .Case("sse4.2", true) .Case("sse4a", true) .Case("tbm", true) + .Case("vaes", true) .Case("x87", true) .Case("xop", true) .Case("xsave", true) @@ -1249,6 +1263,7 @@ .Case("sse4.2", SSELevel >= SSE42) .Case("sse4a", XOPLevel >= SSE4A) .Case("tbm", HasTBM) + .Case("vaes", HasVAES) .Case("x86", true) .Case("x86_32", getTriple().getArch() == llvm::Triple::x86) .Case("x86_64", getTriple().getArch() == llvm::Triple::x86_64) Index: lib/Headers/CMakeLists.txt =================================================================== --- lib/Headers/CMakeLists.txt +++ lib/Headers/CMakeLists.txt @@ -82,6 +82,7 @@ tmmintrin.h unwind.h vadefs.h + vaesintrin.h varargs.h vecintrin.h wmmintrin.h Index: lib/Headers/immintrin.h =================================================================== --- lib/Headers/immintrin.h +++ lib/Headers/immintrin.h @@ -208,6 +208,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__VAES__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__RDRND__) static __inline__ int __attribute__((__always_inline__, __nodebug__, __target__("rdrnd"))) _rdrand16_step(unsigned short *__p) Index: lib/Headers/vaesintrin.h =================================================================== --- lib/Headers/vaesintrin.h +++ lib/Headers/vaesintrin.h @@ -0,0 +1,98 @@ +/*===------------------ vaesintrin.h - VAES intrinsics ---------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __VAESINTRIN_H +#define __VAESINTRIN_H + +/* Default attributes for YMM forms. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("vaes"))) + +/* Default attributes for ZMM forms. */ +#define __DEFAULT_FN_ATTRS_F __attribute__((__always_inline__, __nodebug__, __target__("avx512f,vaes"))) + + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenc_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesenc256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenc_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesenc512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdec_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesdec256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdec_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesdec512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesenclast_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesenclast256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesenclast_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesenclast512((__v8di) __A, + (__v8di) __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS + _mm256_aesdeclast_epi128(__m256i __A, __m256i __B) +{ + return (__m256i) __builtin_ia32_aesdeclast256((__v4di) __A, + (__v4di) __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS_F + _mm512_aesdeclast_epi128(__m512i __A, __m512i __B) +{ + return (__m512i) __builtin_ia32_aesdeclast512((__v8di) __A, + (__v8di) __B); +} + + +#undef __DEFAULT_FN_ATTRS +#undef __DEFAULT_FN_ATTRS_F + +#endif Index: test/CodeGen/attr-target-x86.c =================================================================== --- test/CodeGen/attr-target-x86.c +++ test/CodeGen/attr-target-x86.c @@ -38,9 +38,9 @@ // CHECK: lake{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-xop,-xsave,-xsaveopt" -// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-xop,-xsave,-xsaveopt" +// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx" Index: test/CodeGen/vaes-builtins.c =================================================================== --- test/CodeGen/vaes-builtins.c +++ test/CodeGen/vaes-builtins.c @@ -0,0 +1,55 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +vaes -emit-llvm -o - | FileCheck %s --check-prefix AVX +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -DAVX512 -target-feature +vaes -target-feature +avx512f -emit-llvm -o - | FileCheck %s --check-prefixes AVX,AVX512 + +#include + +__m256i test_mm256_aesenc_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesenc_epi128 + // AVX: @llvm.x86.aesni.aesenc.256 + return _mm256_aesenc_epi128(__A, __B); +} + +__m256i test_mm256_aesenclast_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesenclast_epi128 + // AVX: @llvm.x86.aesni.aesenclast.256 + return _mm256_aesenclast_epi128(__A, __B); +} + +__m256i test_mm256_aesdec_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesdec_epi128 + // AVX: @llvm.x86.aesni.aesdec.256 + return _mm256_aesdec_epi128(__A, __B); +} + +__m256i test_mm256_aesdeclast_epi128(__m256i __A, __m256i __B) { + // AVX-LABEL: @test_mm256_aesdeclast_epi128 + // AVX: @llvm.x86.aesni.aesdeclast.256 + return _mm256_aesdeclast_epi128(__A, __B); +} + +#ifdef AVX512 +__m512i test_mm512_aesenc_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesenc_epi128 + // AVX512: @llvm.x86.aesni.aesenc.512 + return _mm512_aesenc_epi128(__A, __B); +} + +__m512i test_mm512_aesenclast_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesenclast_epi128 + // AVX512: @llvm.x86.aesni.aesenclast.512 + return _mm512_aesenclast_epi128(__A, __B); +} + +__m512i test_mm512_aesdec_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesdec_epi128 + // AVX512: @llvm.x86.aesni.aesdec.512 + return _mm512_aesdec_epi128(__A, __B); +} + +__m512i test_mm512_aesdeclast_epi128(__m512i __A, __m512i __B) { + // AVX512-LABEL: @test_mm512_aesdeclast_epi128 + // AVX512: @llvm.x86.aesni.aesdeclast.512 + return _mm512_aesdeclast_epi128(__A, __B); +} +#endif + Index: test/Driver/x86-target-features.c =================================================================== --- test/Driver/x86-target-features.c +++ test/Driver/x86-target-features.c @@ -94,3 +94,9 @@ // RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-clzero %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-CLZERO %s // CLZERO: "-target-feature" "+clzero" // NO-CLZERO: "-target-feature" "-clzero" + +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mvaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=VAES %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-vaes %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-VAES %s +// VAES: "-target-feature" "+vaes" +// NO-VAES: "-target-feature" "-vaes" + Index: test/Preprocessor/predefined-arch-macros.c =================================================================== --- test/Preprocessor/predefined-arch-macros.c +++ test/Preprocessor/predefined-arch-macros.c @@ -1079,6 +1079,7 @@ // CHECK_ICL_M32: #define __SSE4_2__ 1 // CHECK_ICL_M32: #define __SSE__ 1 // CHECK_ICL_M32: #define __SSSE3__ 1 +// CHECK_ICL_M32: #define __VAES__ 1 // CHECK_ICL_M32: #define __XSAVEC__ 1 // CHECK_ICL_M32: #define __XSAVEOPT__ 1 // CHECK_ICL_M32: #define __XSAVES__ 1 @@ -1124,6 +1125,7 @@ // CHECK_ICL_M64: #define __SSE4_2__ 1 // CHECK_ICL_M64: #define __SSE__ 1 // CHECK_ICL_M64: #define __SSSE3__ 1 +// CHECK_ICL_M64: #define __VAES__ 1 // CHECK_ICL_M64: #define __XSAVEC__ 1 // CHECK_ICL_M64: #define __XSAVEOPT__ 1 // CHECK_ICL_M64: #define __XSAVES__ 1 Index: test/Preprocessor/x86_target_features.c =================================================================== --- test/Preprocessor/x86_target_features.c +++ test/Preprocessor/x86_target_features.c @@ -368,3 +368,14 @@ // RUN: %clang -target i386-unknown-unknown -march=atom -mclflushopt -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=CLFLUSHOPT %s // CLFLUSHOPT: #define __CLFLUSHOPT__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAES %s + +// VAES: #define __AES__ 1 +// VAES: #define __VAES__ 1 + +// RUN: %clang -target i386-unknown-unknown -march=atom -mvaes -mno-aes -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=VAESNOAES %s + +// VAESNOAES-NOT: #define __AES__ 1 +// VAESNOAES-NOT: #define __VAES__ 1 +