Index: cfe/trunk/include/clang/Basic/BuiltinsX86.def =================================================================== --- cfe/trunk/include/clang/Basic/BuiltinsX86.def +++ cfe/trunk/include/clang/Basic/BuiltinsX86.def @@ -1107,6 +1107,9 @@ TARGET_BUILTIN(__builtin_ia32_vplzcntd_512_mask, "V16iV16iV16iUs", "", "avx512cd") TARGET_BUILTIN(__builtin_ia32_vplzcntq_512_mask, "V8LLiV8LLiV8LLiUc", "", "avx512cd") +TARGET_BUILTIN(__builtin_ia32_vpopcntd_512, "V16iV16i", "", "avx512vpopcntdq") +TARGET_BUILTIN(__builtin_ia32_vpopcntq_512, "V8LLiV8LLi", "", "avx512vpopcntdq") + TARGET_BUILTIN(__builtin_ia32_vpermi2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermi2varhi256_mask, "V16sV16sV16sV16sUs", "", "avx512vl,avx512bw") TARGET_BUILTIN(__builtin_ia32_vpermt2varhi128_mask, "V8sV8sV8sV8sUc", "", "avx512vl,avx512bw") Index: cfe/trunk/include/clang/Driver/Options.td =================================================================== --- cfe/trunk/include/clang/Driver/Options.td +++ cfe/trunk/include/clang/Driver/Options.td @@ -1741,6 +1741,7 @@ def mno_avx2 : Flag<["-"], "mno-avx2">, Group; def mno_avx512f : Flag<["-"], "mno-avx512f">, Group; def mno_avx512cd : Flag<["-"], "mno-avx512cd">, Group; +def mno_avx512vpopcntdq : Flag<["-"], "mno-avx512vpopcntdq">, Group; def mno_avx512er : Flag<["-"], "mno-avx512er">, Group; def mno_avx512pf : Flag<["-"], "mno-avx512pf">, Group; def mno_avx512dq : Flag<["-"], "mno-avx512dq">, Group; @@ -1941,6 +1942,7 @@ def mavx2 : Flag<["-"], "mavx2">, Group; def mavx512f : Flag<["-"], "mavx512f">, Group; def mavx512cd : Flag<["-"], "mavx512cd">, Group; +def mavx512vpopcntdq : Flag<["-"], "mavx512vpopcntdq">, Group; def mavx512er : Flag<["-"], "mavx512er">, Group; def mavx512pf : Flag<["-"], "mavx512pf">, Group; def mavx512dq : Flag<["-"], "mavx512dq">, Group; Index: cfe/trunk/lib/Basic/Targets.cpp =================================================================== --- cfe/trunk/lib/Basic/Targets.cpp +++ cfe/trunk/lib/Basic/Targets.cpp @@ -2624,6 +2624,7 @@ bool HasFMA = false; bool HasF16C = false; bool HasAVX512CD = false; + bool HasAVX512VPOPCNTDQ = false; bool HasAVX512ER = false; bool HasAVX512PF = false; bool HasAVX512DQ = false; @@ -3504,9 +3505,9 @@ LLVM_FALLTHROUGH; case AVX512F: Features["avx512f"] = Features["avx512cd"] = Features["avx512er"] = - Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = - Features["avx512vl"] = Features["avx512vbmi"] = - Features["avx512ifma"] = false; + Features["avx512pf"] = Features["avx512dq"] = Features["avx512bw"] = + Features["avx512vl"] = Features["avx512vbmi"] = + Features["avx512ifma"] = Features["avx512vpopcntdq"] = false; } } @@ -3616,7 +3617,8 @@ setSSELevel(Features, AVX512F, Enabled); } else if (Name == "avx512cd" || Name == "avx512er" || Name == "avx512pf" || Name == "avx512dq" || Name == "avx512bw" || Name == "avx512vl" || - Name == "avx512vbmi" || Name == "avx512ifma") { + Name == "avx512vbmi" || Name == "avx512ifma" || + Name == "avx512vpopcntdq") { if (Enabled) setSSELevel(Features, AVX512F, Enabled); // Enable BWI instruction if VBMI is being enabled. @@ -3700,6 +3702,8 @@ HasF16C = true; } else if (Feature == "+avx512cd") { HasAVX512CD = true; + } else if (Feature == "+avx512vpopcntdq") { + HasAVX512VPOPCNTDQ = true; } else if (Feature == "+avx512er") { HasAVX512ER = true; } else if (Feature == "+avx512pf") { @@ -4037,6 +4041,8 @@ if (HasAVX512CD) Builder.defineMacro("__AVX512CD__"); + if (HasAVX512VPOPCNTDQ) + Builder.defineMacro("__AVX512VPOPCNTDQ__"); if (HasAVX512ER) Builder.defineMacro("__AVX512ER__"); if (HasAVX512PF) @@ -4168,6 +4174,7 @@ .Case("avx2", SSELevel >= AVX2) .Case("avx512f", SSELevel >= AVX512F) .Case("avx512cd", HasAVX512CD) + .Case("avx512vpopcntdq", HasAVX512VPOPCNTDQ) .Case("avx512er", HasAVX512ER) .Case("avx512pf", HasAVX512PF) .Case("avx512dq", HasAVX512DQ) @@ -4253,6 +4260,7 @@ .Case("avx512bw", true) .Case("avx512dq", true) .Case("avx512cd", true) + .Case("avx512vpopcntdq", true) .Case("avx512er", true) .Case("avx512pf", true) .Case("avx512vbmi", true) Index: cfe/trunk/lib/CodeGen/CGBuiltin.cpp =================================================================== --- cfe/trunk/lib/CodeGen/CGBuiltin.cpp +++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp @@ -7332,39 +7332,42 @@ AVX512PF, AVX512VBMI, AVX512IFMA, + AVX512VPOPCNTDQ, MAX }; - X86Features Feature = StringSwitch(FeatureStr) - .Case("cmov", X86Features::CMOV) - .Case("mmx", X86Features::MMX) - .Case("popcnt", X86Features::POPCNT) - .Case("sse", X86Features::SSE) - .Case("sse2", X86Features::SSE2) - .Case("sse3", X86Features::SSE3) - .Case("ssse3", X86Features::SSSE3) - .Case("sse4.1", X86Features::SSE4_1) - .Case("sse4.2", X86Features::SSE4_2) - .Case("avx", X86Features::AVX) - .Case("avx2", X86Features::AVX2) - .Case("sse4a", X86Features::SSE4_A) - .Case("fma4", X86Features::FMA4) - .Case("xop", X86Features::XOP) - .Case("fma", X86Features::FMA) - .Case("avx512f", X86Features::AVX512F) - .Case("bmi", X86Features::BMI) - .Case("bmi2", X86Features::BMI2) - .Case("aes", X86Features::AES) - .Case("pclmul", X86Features::PCLMUL) - .Case("avx512vl", X86Features::AVX512VL) - .Case("avx512bw", X86Features::AVX512BW) - .Case("avx512dq", X86Features::AVX512DQ) - .Case("avx512cd", X86Features::AVX512CD) - .Case("avx512er", X86Features::AVX512ER) - .Case("avx512pf", X86Features::AVX512PF) - .Case("avx512vbmi", X86Features::AVX512VBMI) - .Case("avx512ifma", X86Features::AVX512IFMA) - .Default(X86Features::MAX); + X86Features Feature = + StringSwitch(FeatureStr) + .Case("cmov", X86Features::CMOV) + .Case("mmx", X86Features::MMX) + .Case("popcnt", X86Features::POPCNT) + .Case("sse", X86Features::SSE) + .Case("sse2", X86Features::SSE2) + .Case("sse3", X86Features::SSE3) + .Case("ssse3", X86Features::SSSE3) + .Case("sse4.1", X86Features::SSE4_1) + .Case("sse4.2", X86Features::SSE4_2) + .Case("avx", X86Features::AVX) + .Case("avx2", X86Features::AVX2) + .Case("sse4a", X86Features::SSE4_A) + .Case("fma4", X86Features::FMA4) + .Case("xop", X86Features::XOP) + .Case("fma", X86Features::FMA) + .Case("avx512f", X86Features::AVX512F) + .Case("bmi", X86Features::BMI) + .Case("bmi2", X86Features::BMI2) + .Case("aes", X86Features::AES) + .Case("pclmul", X86Features::PCLMUL) + .Case("avx512vl", X86Features::AVX512VL) + .Case("avx512bw", X86Features::AVX512BW) + .Case("avx512dq", X86Features::AVX512DQ) + .Case("avx512cd", X86Features::AVX512CD) + .Case("avx512er", X86Features::AVX512ER) + .Case("avx512pf", X86Features::AVX512PF) + .Case("avx512vbmi", X86Features::AVX512VBMI) + .Case("avx512ifma", X86Features::AVX512IFMA) + .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ) + .Default(X86Features::MAX); assert(Feature != X86Features::MAX && "Invalid feature!"); // Matching the struct layout from the compiler-rt/libgcc structure that is @@ -7517,7 +7520,12 @@ case X86::BI__builtin_ia32_storesd128_mask: { return EmitX86MaskedStore(*this, Ops, 16); } - + case X86::BI__builtin_ia32_vpopcntd_512: + case X86::BI__builtin_ia32_vpopcntq_512: { + llvm::Type *ResultType = ConvertType(E->getType()); + llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType); + return Builder.CreateCall(F, Ops); + } case X86::BI__builtin_ia32_cvtmask2b128: case X86::BI__builtin_ia32_cvtmask2b256: case X86::BI__builtin_ia32_cvtmask2b512: Index: cfe/trunk/lib/Headers/CMakeLists.txt =================================================================== --- cfe/trunk/lib/Headers/CMakeLists.txt +++ cfe/trunk/lib/Headers/CMakeLists.txt @@ -7,6 +7,7 @@ avx2intrin.h avx512bwintrin.h avx512cdintrin.h + avx512vpopcntdqintrin.h avx512dqintrin.h avx512erintrin.h avx512fintrin.h Index: cfe/trunk/lib/Headers/avx512vpopcntdqintrin.h =================================================================== --- cfe/trunk/lib/Headers/avx512vpopcntdqintrin.h +++ cfe/trunk/lib/Headers/avx512vpopcntdqintrin.h @@ -0,0 +1,70 @@ +/*===------------- avx512vpopcntdqintrin.h - AVX512VPOPCNTDQ intrinsics + *------------------=== + * + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + * + *===-----------------------------------------------------------------------=== + */ +#ifndef __IMMINTRIN_H +#error \ + "Never use directly; include instead." +#endif + +#ifndef __AVX512VPOPCNTDQINTRIN_H +#define __AVX512VPOPCNTDQINTRIN_H + +/* Define the default attributes for the functions in this file. */ +#define __DEFAULT_FN_ATTRS \ + __attribute__((__always_inline__, __nodebug__, __target__("avx512vpopcntd" \ + "q"))) + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi64(__m512i __A) { + return (__m512i)__builtin_ia32_vpopcntq_512((__v8di)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectq_512( + (__mmask8)__U, (__v8di)_mm512_popcnt_epi64(__A), (__v8di)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { + return _mm512_mask_popcnt_epi64((__m512i)_mm512_setzero_si512(), __U, __A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS _mm512_popcnt_epi32(__m512i __A) { + return (__m512i)__builtin_ia32_vpopcntd_512((__v16si)__A); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + return (__m512i)__builtin_ia32_selectd_512( + (__mmask16)__U, (__v16si)_mm512_popcnt_epi32(__A), (__v16si)__W); +} + +static __inline__ __m512i __DEFAULT_FN_ATTRS +_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { + return _mm512_mask_popcnt_epi32((__m512i)_mm512_setzero_si512(), __U, __A); +} + +#undef __DEFAULT_FN_ATTRS + +#endif Index: cfe/trunk/lib/Headers/immintrin.h =================================================================== --- cfe/trunk/lib/Headers/immintrin.h +++ cfe/trunk/lib/Headers/immintrin.h @@ -146,6 +146,10 @@ #include #endif +#if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512VPOPCNTDQ__) +#include +#endif + #if !defined(_MSC_VER) || __has_feature(modules) || defined(__AVX512DQ__) #include #endif Index: cfe/trunk/test/CodeGen/attr-target-x86.c =================================================================== --- cfe/trunk/test/CodeGen/attr-target-x86.c +++ cfe/trunk/test/CodeGen/attr-target-x86.c @@ -36,7 +36,7 @@ // CHECK: lake{{.*}} #6 // CHECK: #0 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" // CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+aes,+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" -// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt" +// CHECK: #2 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+x87,-aes,-avx,-avx2,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vl,-avx512vpopcntdq,-f16c,-fma,-fma4,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-xop,-xsave,-xsaveopt" // CHECK: #3 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" // CHECK: #4 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cx16,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes" // CHECK: #5 = {{.*}}"target-cpu"="x86-64" "target-features"="+fxsr,+sse,+sse2,+x87,-3dnow,-3dnowa,-mmx" Index: cfe/trunk/test/CodeGen/avx512vpopcntdqintrin.c =================================================================== --- cfe/trunk/test/CodeGen/avx512vpopcntdqintrin.c +++ cfe/trunk/test/CodeGen/avx512vpopcntdqintrin.c @@ -0,0 +1,38 @@ +// RUN: %clang_cc1 -ffreestanding %s -triple=x86_64-apple-darwin -target-feature +avx512vpopcntdq -emit-llvm -o - -Wall -Werror | FileCheck %s + +#include + +__m512i test_mm512_popcnt_epi64(__m512i __A) { + // CHECK-LABEL: @test_mm512_popcnt_epi64 + // CHECK: @llvm.ctpop.v8i64 + return _mm512_popcnt_epi64(__A); +} +__m512i test_mm512_mask_popcnt_epi64(__m512i __W, __mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_popcnt_epi64 + // CHECK: @llvm.ctpop.v8i64 + // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}} + return _mm512_mask_popcnt_epi64(__W, __U, __A); +} +__m512i test_mm512_maskz_popcnt_epi64(__mmask8 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_popcnt_epi64 + // CHECK: @llvm.ctpop.v8i64 + // CHECK: select <8 x i1> %{{[0-9]+}}, <8 x i64> %{{[0-9]+}}, <8 x i64> {{.*}} + return _mm512_maskz_popcnt_epi64(__U, __A); +} +__m512i test_mm512_popcnt_epi32(__m512i __A) { + // CHECK-LABEL: @test_mm512_popcnt_epi32 + // CHECK: @llvm.ctpop.v16i32 + return _mm512_popcnt_epi32(__A); +} +__m512i test_mm512_mask_popcnt_epi32(__m512i __W, __mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_mask_popcnt_epi32 + // CHECK: @llvm.ctpop.v16i32 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}} + return _mm512_mask_popcnt_epi32(__W, __U, __A); +} +__m512i test_mm512_maskz_popcnt_epi32(__mmask16 __U, __m512i __A) { + // CHECK-LABEL: @test_mm512_maskz_popcnt_epi32 + // CHECK: @llvm.ctpop.v16i32 + // CHECK: select <16 x i1> %{{[0-9]+}}, <16 x i32> %{{[0-9]+}}, <16 x i32> {{.*}} + return _mm512_maskz_popcnt_epi32(__U, __A); +}