Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def =================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -1092,6 +1092,17 @@ TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq") TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_128, "V16cV16c", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_128, "V8sV8s", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_256, "V32cV32c", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_256, "V16sV16s", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntb_512, "V64cV64c", "", "avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpopcntw_512, "V32sV32s", "", "avx512bitalg") + +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb128_mask, "UsV16cV16cUs", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb256_mask, "UiV32cV32cUi", "", "avx512vl,avx512bitalg") +TARGET_BUILTIN(__builtin_ia32_vpshufbitqmb512_mask, "ULLiV64cV64cULLi", "", "avx512bitalg") + TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -2467,6 +2467,8 @@ def mno_avx2 : Flag<["-"], "mno-avx2">, Group; def mavx512f : Flag<["-"], "mavx512f">, Group; def mno_avx512f : Flag<["-"], "mno-avx512f">, Group; +def mavx512bitalg : Flag<["-"], "mavx512bitalg">, Group; +def mno_avx512bitalg : Flag<["-"], "mno-avx512bitalg">, Group; def mavx512bw : Flag<["-"], "mavx512bw">, Group; def mno_avx512bw : Flag<["-"], "mno-avx512bw">, Group; def mavx512cd : Flag<["-"], "mavx512cd">, Group; Index: cfe/trunk/lib/Basic/Targets/X86.h =================================================================== --- cfe/trunk/lib/Basic/Targets/X86.h +++ cfe/trunk/lib/Basic/Targets/X86.h @@ -71,6 +71,7 @@ bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; + bool HasAVX512BITALG = false; bool HasAVX512BW = false; bool HasAVX512VL = false; bool HasAVX512VBMI = false; Index: cfe/trunk/lib/Basic/Targets/X86.cpp =================================================================== --- cfe/trunk/lib/Basic/Targets/X86.cpp +++ cfe/trunk/lib/Basic/Targets/X86.cpp @@ -135,6 +135,7 @@ setFeatureEnabledImpl(Features, "vaes", true); setFeatureEnabledImpl(Features, "gfni", true); setFeatureEnabledImpl(Features, "vpclmulqdq", true); + setFeatureEnabledImpl(Features, "avx512bitalg", true); // TODO: Add icelake features here. LLVM_FALLTHROUGH; case CK_Cannonlake: @@ -473,7 +474,8 @@ Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = Features["avx512vl"] = Features["avx512vbmi"] = - Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; + Features["avx512ifma"] = Features["avx512vpopcntdq"] = + Features["avx512bitalg"] = false; break; } } @@ -604,15 +606,15 @@ } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || Name == "avx512vbmi" || Name == "avx512ifma" || - Name == "avx512vpopcntdq") { + Name == "avx512vpopcntdq" || Name == "avx512bitalg") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); - // Enable BWI instruction if VBMI is being enabled. - if (Name == "avx512vbmi" && Enabled) + // Enable BWI instruction if VBMI / BITALG is being enabled. + if ((Name == "avx512vbmi" || Name == "avx512bitalg") && Enabled) Features["avx512bw"] = true; - // Also disable VBMI if BWI is being disabled. + // Also disable VBMI / BITALG if BWI is being disabled. if (Name == "avx512bw" && !Enabled) - Features["avx512vbmi"] = false; + Features["avx512vbmi"] = Features["avx512bitalg"] = false; } else if (Name == "fma") { if (Enabled) setSSELevel(Features, AVX, Enabled); @@ -702,6 +704,8 @@ HasAVX512PF = true; } else if (Feature == "+avx512dq") { HasAVX512DQ = true; + } else if (Feature == "+avx512bitalg") { + HasAVX512BITALG = true; } else if (Feature == "+avx512bw") { HasAVX512BW = true; } else if (Feature == "+avx512vl") { @@ -1041,6 +1045,8 @@ Builder.defineMacro("__AVX512PF__"); if (HasAVX512DQ) Builder.defineMacro("__AVX512DQ__"); + if (HasAVX512BITALG) + Builder.defineMacro("__AVX512BITALG__"); if (HasAVX512BW) Builder.defineMacro("__AVX512BW__"); if (HasAVX512VL) @@ -1179,6 +1185,7 @@ .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512dq", true) + .Case("avx512bitalg", true) .Case("avx512bw", true) .Case("avx512vl", true) .Case("avx512vbmi", true) @@ -1244,6 +1251,7 @@ .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) + .Case("avx512bitalg", HasAVX512BITALG) .Case("avx512bw", HasAVX512BW) .Case("avx512vl", HasAVX512VL) .Case("avx512vbmi", HasAVX512VBMI) Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp @@ -8143,12 +8143,18 @@ case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } + case X86::BI__builtin_ia32_vpopcntb_128: case X86::BI__builtin_ia32_vpopcntd_128: case X86::BI__builtin_ia32_vpopcntq_128: + case X86::BI__builtin_ia32_vpopcntw_128: + case X86::BI__builtin_ia32_vpopcntb_256: case X86::BI__builtin_ia32_vpopcntd_256: case X86::BI__builtin_ia32_vpopcntq_256: + case X86::BI__builtin_ia32_vpopcntw_256: + case X86::BI__builtin_ia32_vpopcntb_512: case X86::BI__builtin_ia32_vpopcntd_512: - case X86::BI__builtin_ia32_vpopcntq_512: { + case X86::BI__builtin_ia32_vpopcntq_512: + case X86::BI__builtin_ia32_vpopcntw_512: { llvm::Type *ResultType = ConvertType(E->getType()); llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); return Builder.CreateCall(F, Ops); Index: cfe/trunk/lib/Headers/CMakeLists.txt =================================================================== --- cfe/trunk/lib/Headers/CMakeLists.txt +++ cfe/trunk/lib/Headers/CMakeLists.txt @@ -7,6 +7,8 @@ arm64intr.h avx2intrin.h avx512bwintrin.h + avx512bitalgintrin.h + avx512vlbitalgintrin.h avx512cdintrin.h avx512vpopcntdqintrin.h avx512dqintrin.h Index: cfe/trunk/lib/Headers/avx512bitalgintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512bitalgintrin.h +++ cfe/trunk/lib/Headers/avx512bitalgintrin.h @@ -0,0 +1,97 @@ +/*===------------- avx512bitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512BITALGINTRIN_H +#define __AVX512BITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512bitalg"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi16(__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcntw_512((__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectw_512((__mmask32) __U, + (__v32hi) _mm512_popcnt_epi16(__B), + (__v32hi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) +{ + return _mm512_mask_popcnt_epi16((__m512i) _mm512_setzero_hi(), + __U, + __B); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_popcnt_epi8(__m512i __A) +{ + return (__m512i) __builtin_ia32_vpopcntb_512((__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) +{ + return (__m512i) __builtin_ia32_selectb_512((__mmask64) __U, + (__v64qi) _mm512_popcnt_epi8(__B), + (__v64qi) __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) +{ + return _mm512_mask_popcnt_epi8((__m512i) _mm512_setzero_qi(), + __U, + __B); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) +{ + return (__mmask64) __builtin_ia32_vpshufbitqmb512_mask((__v64qi) __A, + (__v64qi) __B, + __U); +} + +static __inline__ __mmask64 __DEFAULT_FN_ATTRS +_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) +{ + return _mm512_mask_bitshuffle_epi64_mask((__mmask64) -1, + __A, + __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif Index: cfe/trunk/lib/Headers/avx512vlbitalgintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512vlbitalgintrin.h +++ cfe/trunk/lib/Headers/avx512vlbitalgintrin.h @@ -0,0 +1,157 @@ +/*===------------- avx512vlbitalgintrin.h - BITALG intrinsics ------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error "Never use directly; include instead." +#endif + +#ifndef __AVX512VLBITALGINTRIN_H +#define __AVX512VLBITALGINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__, __target__("avx512vl,avx512bitalg"))) + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi16(__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcntw_256((__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectw_256((__mmask16) __U, + (__v16hi) _mm256_popcnt_epi16(__B), + (__v16hi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) +{ + return _mm256_mask_popcnt_epi16((__m256i) _mm256_setzero_si256(), + __U, + __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi16(__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcntw_128((__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectw_128((__mmask8) __U, + (__v8hi) _mm128_popcnt_epi16(__B), + (__v8hi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) +{ + return _mm128_mask_popcnt_epi16((__m128i) _mm_setzero_si128(), + __U, + __B); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_popcnt_epi8(__m256i __A) +{ + return (__m256i) __builtin_ia32_vpopcntb_256((__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) +{ + return (__m256i) __builtin_ia32_selectb_256((__mmask32) __U, + (__v32qi) _mm256_popcnt_epi8(__B), + (__v32qi) __A); +} + +static __inline__ __m256i __DEFAULT_FN_ATTRS +_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) +{ + return _mm256_mask_popcnt_epi8((__m256i) _mm256_setzero_si256(), + __U, + __B); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_popcnt_epi8(__m128i __A) +{ + return (__m128i) __builtin_ia32_vpopcntb_128((__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) +{ + return (__m128i) __builtin_ia32_selectb_128((__mmask16) __U, + (__v16qi) _mm128_popcnt_epi8(__B), + (__v16qi) __A); +} + +static __inline__ __m128i __DEFAULT_FN_ATTRS +_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) +{ + return _mm128_mask_popcnt_epi8((__m128i) _mm_setzero_si128(), + __U, + __B); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) +{ + return (__mmask32) __builtin_ia32_vpshufbitqmb256_mask((__v32qi) __A, + (__v32qi) __B, + __U); +} + +static __inline__ __mmask32 __DEFAULT_FN_ATTRS +_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) +{ + return _mm256_mask_bitshuffle_epi32_mask((__mmask32) -1, + __A, + __B); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) +{ + return (__mmask16) __builtin_ia32_vpshufbitqmb128_mask((__v16qi) __A, + (__v16qi) __B, + __U); +} + +static __inline__ __mmask16 __DEFAULT_FN_ATTRS +_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) +{ + return _mm128_mask_bitshuffle_epi16_mask((__mmask16) -1, + __A, + __B); +} + + +#undef __DEFAULT_FN_ATTRS + +#endif Index: cfe/trunk/lib/Headers/immintrin.h =================================================================== --- cfe/trunk/lib/Headers/immintrin.h +++ cfe/trunk/lib/Headers/immintrin.h @@ -150,6 +150,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512BITALG__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512CD__) #include #endif @@ -168,6 +172,11 @@ #endif #if !defined(_MSC_VER) || __has_feature(modules) || \ + (defined(__AVX512VL__) && defined(__AVX512BITALG__)) +#include +#endif + +#if !defined(_MSC_VER) || __has_feature(modules) || \ (defined(__AVX512VL__) && defined(__AVX512BW__)) #include #endif Index: cfe/trunk/test/CodeGen/attr-target-x86.c =================================================================== --- cfe/trunk/test/CodeGen/attr-target-x86.c +++ cfe/trunk/test/CodeGen/attr-target-x86.c @@ -38,9 +38,9 @@ // CHECK: lake{{.*}} #7 // CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-aes,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-gfni,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" -// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" +// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-avx,-avx2,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop,-xsave,-xsaveopt" // CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes" // CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+x87,-3dnow,-3dnowa,-mmx" // CHECK: #7 = {{.*}}"target-cpu"="lakemont" "target-features"="+mmx" Index: cfe/trunk/test/CodeGen/avx512bitalg-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512bitalg-builtins.c +++ cfe/trunk/test/CodeGen/avx512bitalg-builtins.c @@ -0,0 +1,54 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m512i test_mm512_popcnt_epi16(__m512i __A) { + // CHECK-LABEL: @test_mm512_popcnt_epi16 + // CHECK: @llvm.ctpop.v32i16 + return _mm512_popcnt_epi16(__A); +} + +__m512i test_mm512_mask_popcnt_epi16(__m512i __A, __mmask32 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_popcnt_epi16 + // CHECK: @llvm.ctpop.v32i16 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}} + return _mm512_mask_popcnt_epi16(__A, __U, __B); +} +__m512i test_mm512_maskz_popcnt_epi16(__mmask32 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_popcnt_epi16 + // CHECK: @llvm.ctpop.v32i16 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i16> %{{[0-9]+}}, <32 x i16> {{.*}} + return _mm512_maskz_popcnt_epi16(__U, __B); +} + +__m512i test_mm512_popcnt_epi8(__m512i __A) { + // CHECK-LABEL: @test_mm512_popcnt_epi8 + // CHECK: @llvm.ctpop.v64i8 + return _mm512_popcnt_epi8(__A); +} + +__m512i test_mm512_mask_popcnt_epi8(__m512i __A, __mmask64 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_popcnt_epi8 + // CHECK: @llvm.ctpop.v64i8 + // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_mask_popcnt_epi8(__A, __U, __B); +} +__m512i test_mm512_maskz_popcnt_epi8(__mmask64 __U, __m512i __B) { + // CHECK-LABEL: @test_mm512_maskz_popcnt_epi8 + // CHECK: @llvm.ctpop.v64i8 + // CHECK: select <64 x i1> %{{[0-9]+}}, <64 x i8> %{{[0-9]+}}, <64 x i8> {{.*}} + return _mm512_maskz_popcnt_epi8(__U, __B); +} + +__mmask64 test_mm512_mask_bitshuffle_epi64_mask(__mmask64 __U, __m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_mask_bitshuffle_epi64_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512 + return _mm512_mask_bitshuffle_epi64_mask(__U, __A, __B); +} + +__mmask64 test_mm512_bitshuffle_epi64_mask(__m512i __A, __m512i __B) { + // CHECK-LABEL: @test_mm512_bitshuffle_epi64_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.512 + return _mm512_bitshuffle_epi64_mask(__A, __B); +} + Index: cfe/trunk/test/CodeGen/avx512vlbitalg-builtins.c =================================================================== --- cfe/trunk/test/CodeGen/avx512vlbitalg-builtins.c +++ cfe/trunk/test/CodeGen/avx512vlbitalg-builtins.c @@ -0,0 +1,104 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512bitalg -target-feature +avx512vl -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m256i test_mm256_popcnt_epi16(__m256i __A) { + // CHECK-LABEL: @test_mm256_popcnt_epi16 + // CHECK: @llvm.ctpop.v16i16 + return _mm256_popcnt_epi16(__A); +} + +__m256i test_mm256_mask_popcnt_epi16(__m256i __A, __mmask16 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_popcnt_epi16 + // CHECK: @llvm.ctpop.v16i16 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}} + return _mm256_mask_popcnt_epi16(__A, __U, __B); +} +__m256i test_mm256_maskz_popcnt_epi16(__mmask16 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_popcnt_epi16 + // CHECK: @llvm.ctpop.v16i16 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i16> %{{[0-9]+}}, <16 x i16> {{.*}} + return _mm256_maskz_popcnt_epi16(__U, __B); +} + +__m128i test_mm128_popcnt_epi16(__m128i __A) { + // CHECK-LABEL: @test_mm128_popcnt_epi16 + // CHECK: @llvm.ctpop.v8i16 + return _mm128_popcnt_epi16(__A); +} + +__m128i test_mm128_mask_popcnt_epi16(__m128i __A, __mmask8 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_popcnt_epi16 + // CHECK: @llvm.ctpop.v8i16 + // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}} + return _mm128_mask_popcnt_epi16(__A, __U, __B); +} +__m128i test_mm128_maskz_popcnt_epi16(__mmask8 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_popcnt_epi16 + // CHECK: @llvm.ctpop.v8i16 + // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i16> %{{[0-9]+}}, <8 x i16> {{.*}} + return _mm128_maskz_popcnt_epi16(__U, __B); +} + +__m256i test_mm256_popcnt_epi8(__m256i __A) { + // CHECK-LABEL: @test_mm256_popcnt_epi8 + // CHECK: @llvm.ctpop.v32i8 + return _mm256_popcnt_epi8(__A); +} + +__m256i test_mm256_mask_popcnt_epi8(__m256i __A, __mmask32 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_popcnt_epi8 + // CHECK: @llvm.ctpop.v32i8 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_mask_popcnt_epi8(__A, __U, __B); +} +__m256i test_mm256_maskz_popcnt_epi8(__mmask32 __U, __m256i __B) { + // CHECK-LABEL: @test_mm256_maskz_popcnt_epi8 + // CHECK: @llvm.ctpop.v32i8 + // CHECK: select <32 x i1> %{{[0-9]+}}, <32 x i8> %{{[0-9]+}}, <32 x i8> {{.*}} + return _mm256_maskz_popcnt_epi8(__U, __B); +} + +__m128i test_mm128_popcnt_epi8(__m128i __A) { + // CHECK-LABEL: @test_mm128_popcnt_epi8 + // CHECK: @llvm.ctpop.v16i8 + return _mm128_popcnt_epi8(__A); +} + +__m128i test_mm128_mask_popcnt_epi8(__m128i __A, __mmask16 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_popcnt_epi8 + // CHECK: @llvm.ctpop.v16i8 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm128_mask_popcnt_epi8(__A, __U, __B); +} +__m128i test_mm128_maskz_popcnt_epi8(__mmask16 __U, __m128i __B) { + // CHECK-LABEL: @test_mm128_maskz_popcnt_epi8 + // CHECK: @llvm.ctpop.v16i8 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i8> %{{[0-9]+}}, <16 x i8> {{.*}} + return _mm128_maskz_popcnt_epi8(__U, __B); +} + +__mmask32 test_mm256_mask_bitshuffle_epi32_mask(__mmask32 __U, __m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_mask_bitshuffle_epi32_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256 + return _mm256_mask_bitshuffle_epi32_mask(__U, __A, __B); +} + +__mmask32 test_mm256_bitshuffle_epi32_mask(__m256i __A, __m256i __B) { + // CHECK-LABEL: @test_mm256_bitshuffle_epi32_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.256 + return _mm256_bitshuffle_epi32_mask(__A, __B); +} + +__mmask16 test_mm128_mask_bitshuffle_epi16_mask(__mmask16 __U, __m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_mask_bitshuffle_epi16_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128 + return _mm128_mask_bitshuffle_epi16_mask(__U, __A, __B); +} + +__mmask16 test_mm128_bitshuffle_epi16_mask(__m128i __A, __m128i __B) { + // CHECK-LABEL: @test_mm128_bitshuffle_epi16_mask + // CHECK: @llvm.x86.avx512.mask.vpshufbitqmb.128 + return _mm128_bitshuffle_epi16_mask(__A, __B); +} + Index: cfe/trunk/test/Driver/x86-target-features.c =================================================================== --- cfe/trunk/test/Driver/x86-target-features.c +++ cfe/trunk/test/Driver/x86-target-features.c @@ -110,3 +110,8 @@ // VPCLMULQDQ: "-target-feature" "+vpclmulqdq" // NO-VPCLMULQDQ: "-target-feature" "-vpclmulqdq" +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mavx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=BITALG %s +// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mno-avx512bitalg %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-BITALG %s +// BITALG: "-target-feature" "+avx512bitalg" +// NO-BITALG: "-target-feature" "-avx512bitalg" + Index: cfe/trunk/test/Preprocessor/predefined-arch-macros.c =================================================================== --- cfe/trunk/test/Preprocessor/predefined-arch-macros.c +++ cfe/trunk/test/Preprocessor/predefined-arch-macros.c @@ -1050,6 +1050,7 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M32 // CHECK_ICL_M32: #define __AES__ 1 // CHECK_ICL_M32: #define __AVX2__ 1 +// CHECK_ICL_M32: #define __AVX512BITALG__ 1 // CHECK_ICL_M32: #define __AVX512BW__ 1 // CHECK_ICL_M32: #define __AVX512CD__ 1 // CHECK_ICL_M32: #define __AVX512DQ__ 1 @@ -1098,6 +1099,7 @@ // RUN: | FileCheck -match-full-lines %s -check-prefix=CHECK_ICL_M64 // CHECK_ICL_M64: #define __AES__ 1 // CHECK_ICL_M64: #define __AVX2__ 1 +// CHECK_ICL_M64: #define __AVX512BITALG__ 1 // CHECK_ICL_M64: #define __AVX512BW__ 1 // CHECK_ICL_M64: #define __AVX512CD__ 1 // CHECK_ICL_M64: #define __AVX512DQ__ 1 Index: cfe/trunk/test/Preprocessor/x86_target_features.c =================================================================== --- cfe/trunk/test/Preprocessor/x86_target_features.c +++ cfe/trunk/test/Preprocessor/x86_target_features.c @@ -209,11 +209,33 @@ // AVX512VBMI: #define __SSE__ 1 // AVX512VBMI: #define __SSSE3__ 1 +// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALG %s + +// AVX512BITALG: #define __AVX2__ 1 +// AVX512BITALG: #define __AVX512BITALG__ 1 +// AVX512BITALG: #define __AVX512BW__ 1 +// AVX512BITALG: #define __AVX512F__ 1 +// AVX512BITALG: #define __AVX__ 1 +// AVX512BITALG: #define __SSE2_MATH__ 1 +// AVX512BITALG: #define __SSE2__ 1 +// AVX512BITALG: #define __SSE3__ 1 +// AVX512BITALG: #define __SSE4_1__ 1 +// AVX512BITALG: #define __SSE4_2__ 1 +// AVX512BITALG: #define __SSE_MATH__ 1 +// AVX512BITALG: #define __SSE__ 1 +// AVX512BITALG: #define __SSSE3__ 1 + + // RUN: %clang -target i386-unknown-unknown -march=atom -mavx512vbmi -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512VBMINOAVX512BW %s // AVX512VBMINOAVX512BW-NOT: #define __AVX512BW__ 1 // AVX512VBMINOAVX512BW-NOT: #define __AVX512VBMI__ 1 +// RUN: %clang -target i386-unknown-unknown -march=atom -mavx512bitalg -mno-avx512bw -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=AVX512BITALGNOAVX512BW %s + +// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BITALG__ 1 +// AVX512BITALGNOAVX512BW-NOT: #define __AVX512BW__ 1 + // RUN: %clang -target i386-unknown-unknown -march=atom -msse4.2 -x c -E -dM -o - %s | FileCheck -match-full-lines --check-prefix=SSE42POPCNT %s // SSE42POPCNT: #define __POPCNT__ 1