Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -5388,6 +5388,21 @@ [IntrNoMem]>; } +// AVX-512 population count instruction. +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_popcnt_d_512 : + GCCBuiltin<"__builtin_ia32_vpopcntd_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_popcnt_q_512 : + GCCBuiltin<"__builtin_ia32_vpopcntq_512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; +} + // Compares let TargetPrefix = "x86" in { // 512-bit Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1397,6 +1397,7 @@ Features["avx512cd"] = HasLeaf7 && ((EBX >> 28) & 1) && HasAVX512Save; Features["avx512bw"] = HasLeaf7 && ((EBX >> 30) & 1) && HasAVX512Save; Features["avx512vl"] = HasLeaf7 && ((EBX >> 31) & 1) && HasAVX512Save; + Features["avx512_vpopcntdq"] = HasLeaf7 && ((EBX >> 14) & 1) && HasAVX512Save; Features["prefetchwt1"] = HasLeaf7 && (ECX & 1); Features["avx512vbmi"] = HasLeaf7 && ((ECX >> 1) & 1) && HasAVX512Save; Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -127,6 +127,9 @@ def FeatureCDI : SubtargetFeature<"avx512cd", "HasCDI", "true", "Enable AVX-512 Conflict Detection Instructions", [FeatureAVX512]>; +def FeatureVPOPCNTDQ : SubtargetFeature<"avx512_vpopcntdq", "HasVPOPCNTDQ", + "true", "Enable AVX-512 Population Count Instructions", + [FeatureAVX512]>; def FeaturePFI : SubtargetFeature<"avx512pf", "HasPFI", "true", "Enable AVX-512 PreFetch Instructions", [FeatureAVX512]>; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1388,6 +1388,12 @@ setOperationAction(ISD::MUL, MVT::v8i64, Legal); } + if (Subtarget.hasVPOPCNTDQ()) { + // VPOPCNTDQ sub-targets extend popcnt to use the 512 version. + setOperationAction(ISD::CTPOP, MVT::v8i64, Legal); + setOperationAction(ISD::CTPOP, MVT::v16i32, Legal); + } + // Custom lower several nodes. for (auto VT : { MVT::v4i32, MVT::v8i32, MVT::v2i64, MVT::v4i64, MVT::v4f32, MVT::v8f32, MVT::v2f64, MVT::v4f64 }) { Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -8640,6 +8640,22 @@ defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; //===---------------------------------------------------------------------===// +// Counts number of ones - VPOPCNTD and VPOPCNTQ +//===---------------------------------------------------------------------===// + +multiclass avx512_unary_rmb_popcnt opc, string OpcodeStr, X86VectorVTInfo VTInfo> { + let Predicates = [HasVPOPCNTDQ] in + defm Z : avx512_unary_rmb, EVEX_V512; +} + +multiclass avx512_unary_rmb_popcnt_dq opc, string OpcodeStr> { + defm Q : avx512_unary_rmb_popcnt, VEX_W; + defm D : avx512_unary_rmb_popcnt; +} + +defm VPOPCNT : avx512_unary_rmb_popcnt_dq<0x55, "vpopcnt">; + +//===---------------------------------------------------------------------===// // Replicate Single FP - MOVSHDUP and MOVSLDUP //===---------------------------------------------------------------------===// multiclass avx512_replicate opc, string OpcodeStr, SDNode OpNode>{ Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -871,6 +871,8 @@ { X86::TZMSK64rr, X86::TZMSK64rm, 0 }, // AVX-512 foldable instructions + { X86::VPOPCNTDZrr, X86::VPOPCNTDZrm, TB_NO_REVERSE }, + { X86::VPOPCNTQZrr, X86::VPOPCNTQZrm, TB_NO_REVERSE }, { X86::VBROADCASTSSZr, X86::VBROADCASTSSZm, TB_NO_REVERSE }, { X86::VBROADCASTSDZr, X86::VBROADCASTSDZm, TB_NO_REVERSE }, { X86::VMOV64toPQIZrr, X86::VMOVQI2PQIZrm, 0 }, @@ -2298,6 +2300,8 @@ { X86::VXORPSZ256rr, X86::VXORPSZ256rm, 0 }, // AVX-512 masked foldable instructions + { X86::VPOPCNTDZrrkz, X86::VPOPCNTDZrmkz, TB_NO_REVERSE }, + { X86::VPOPCNTQZrrkz, X86::VPOPCNTQZrmkz, TB_NO_REVERSE }, { X86::VBROADCASTSSZrkz, X86::VBROADCASTSSZmkz, TB_NO_REVERSE }, { X86::VBROADCASTSDZrkz, X86::VBROADCASTSDZmkz, TB_NO_REVERSE }, { X86::VPABSBZrrkz, X86::VPABSBZrmkz, 0 }, @@ -2919,6 +2923,8 @@ { X86::VXORPSZ128rrkz, X86::VXORPSZ128rmkz, 0 }, // AVX-512 masked foldable instructions + { X86::VPOPCNTDZrrk, X86::VPOPCNTDZrmk, TB_NO_REVERSE }, + { X86::VPOPCNTQZrrk, X86::VPOPCNTQZrmk, TB_NO_REVERSE }, { X86::VBROADCASTSSZrk, X86::VBROADCASTSSZmk, TB_NO_REVERSE }, { X86::VBROADCASTSDZrk, X86::VBROADCASTSDZmk, TB_NO_REVERSE }, { X86::VPABSBZrrk, X86::VPABSBZrmk, 0 }, @@ -6984,6 +6990,12 @@ case X86::POPCNT16rr:case X86::POPCNT16rm: case X86::POPCNT32rr:case X86::POPCNT32rm: case X86::POPCNT64rr:case X86::POPCNT64rm: + case X86::VPOPCNTDZrrk: case X86::VPOPCNTDZrmk: + case X86::VPOPCNTQZrrk: case X86::VPOPCNTQZrmk: + case X86::VPOPCNTDZrrkz: case X86::VPOPCNTDZrmkz: + case X86::VPOPCNTQZrrkz: case X86::VPOPCNTQZrmkz: + case X86::VPOPCNTDZrr: case X86::VPOPCNTDZrm: + case X86::VPOPCNTQZrr: case X86::VPOPCNTQZrm: case X86::TZCNT16rr: case X86::TZCNT16rm: case X86::TZCNT32rr: case X86::TZCNT32rm: case X86::TZCNT64rr: case X86::TZCNT64rm: @@ -7002,6 +7014,12 @@ case X86::POPCNT16rr:case X86::POPCNT16rm: case X86::POPCNT32rr:case X86::POPCNT32rm: case X86::POPCNT64rr:case X86::POPCNT64rm: + case X86::VPOPCNTDZrrk: case X86::VPOPCNTDZrmk: + case X86::VPOPCNTQZrrk: case X86::VPOPCNTQZrmk: + case X86::VPOPCNTDZrrkz: case X86::VPOPCNTDZrmkz: + case X86::VPOPCNTQZrrkz: case X86::VPOPCNTQZrmkz: + case X86::VPOPCNTDZrr: case X86::VPOPCNTDZrm: + case X86::VPOPCNTQZrr: case X86::VPOPCNTQZrm: return X86::COND_E; case X86::TZCNT16rr: case X86::TZCNT16rm: case X86::TZCNT32rr: case X86::TZCNT32rm: Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -807,6 +807,9 @@ def NoAVX512 : Predicate<"!Subtarget->hasAVX512()">; def HasCDI : Predicate<"Subtarget->hasCDI()">, AssemblerPredicate<"FeatureCDI", "AVX-512 CD ISA">; +def HasVPOPCNTDQ : Predicate<"Subtarget->hasVPOPCNTDQ()">, + AssemblerPredicate<"FeatureVPOPCNTDQ", "AVX-512 VPOPCNTDQ ISA">; +def NoVPOPCNTDQ : Predicate<"!Subtarget->hasVPOPCNTDQ()">; def HasPFI : Predicate<"Subtarget->hasPFI()">, AssemblerPredicate<"FeaturePFI", "AVX-512 PF ISA">; def HasERI : Predicate<"Subtarget->hasERI()">, Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1045,6 +1045,10 @@ X86ISD::MULTISHIFT, 0), X86_INTRINSIC_DATA(avx512_mask_pmultishift_qb_512, INTR_TYPE_2OP_MASK, X86ISD::MULTISHIFT, 0), + X86_INTRINSIC_DATA(avx512_mask_popcnt_d_512, INTR_TYPE_1OP_MASK, + ISD::CTPOP, 0), + X86_INTRINSIC_DATA(avx512_mask_popcnt_q_512, INTR_TYPE_1OP_MASK, + ISD::CTPOP, 0), X86_INTRINSIC_DATA(avx512_mask_prol_d_128, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0), X86_INTRINSIC_DATA(avx512_mask_prol_d_256, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0), X86_INTRINSIC_DATA(avx512_mask_prol_d_512, INTR_TYPE_2OP_IMM8_MASK, X86ISD::VROTLI, 0), Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -262,6 +262,9 @@ /// Processor has AVX-512 Conflict Detection Instructions bool HasCDI; + /// Processor has AVX-512 population count Instructions + bool HasVPOPCNTDQ; + /// Processor has AVX-512 Doubleword and Quadword instructions bool HasDQI; @@ -488,6 +491,7 @@ bool slowLEA() const { return SlowLEA; } bool slowIncDec() const { return SlowIncDec; } bool hasCDI() const { return HasCDI; } + bool hasVPOPCNTDQ() const { return HasVPOPCNTDQ; } bool hasPFI() const { return HasPFI; } bool hasERI() const { return HasERI; } bool hasDQI() const { return HasDQI; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -279,6 +279,7 @@ HasCDI = false; HasPFI = false; HasDQI = false; + HasVPOPCNTDQ = false; HasBWI = false; HasVLX = false; HasADX = false; Index: test/CodeGen/X86/avx512_vpopcntdq-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512_vpopcntdq-intrinsics.ll +++ test/CodeGen/X86/avx512_vpopcntdq-intrinsics.ll @@ -0,0 +1,45 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512_vpopcntdq | FileCheck %s + +define <16 x i32> @test_vpopcnt_d(<16 x i32> %a) { +; CHECK-LABEL: test_vpopcnt_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpopcntd %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.popcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) + ret <16 x i32> %res +} + +declare <16 x i32> @llvm.x86.avx512.mask.popcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly + +define <8 x i64> @test_vpopcnt_q(<8 x i64> %a) { +; CHECK-LABEL: test_vpopcnt_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpopcntq %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.popcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) + ret <8 x i64> %res +} + +declare <8 x i64> @llvm.x86.avx512.mask.popcnt.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly + +define <16 x i32> @test_maskz_vpopcnt_d(<16 x i32> %a, i16 %mask) { +; CHECK-LABEL: test_maskz_vpopcnt_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpopcntd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.popcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) + ret <16 x i32> %res +} + +define <8 x i64> @test_mask_vpopcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { +; CHECK-LABEL: test_mask_vpopcnt_q: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpopcntq %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.popcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) + ret <8 x i64> %res +} Index: test/MC/X86/x86-64-avx512_vpopcntdq.s =================================================================== --- test/MC/X86/x86-64-avx512_vpopcntdq.s +++ test/MC/X86/x86-64-avx512_vpopcntdq.s @@ -0,0 +1,225 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mattr=+avx512_vpopcntdq --show-encoding %s | FileCheck %s + +// CHECK: vpopcntq %zmm25, %zmm20 +// CHECK: encoding: [0x62,0x82,0xfd,0x48,0x55,0xe1] + vpopcntq %zmm25, %zmm20 + +// CHECK: vpopcntq %zmm25, %zmm20 {%k6} +// CHECK: encoding: [0x62,0x82,0xfd,0x4e,0x55,0xe1] + vpopcntq %zmm25, %zmm20 {%k6} + +// CHECK: vpopcntq %zmm25, %zmm20 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xce,0x55,0xe1] + vpopcntq %zmm25, %zmm20 {%k6} {z} + +// CHECK: vpopcntq (%rcx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x21] + vpopcntq (%rcx), %zmm20 + +// CHECK: vpopcntq 291(%rax,%r14,8), %zmm20 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x55,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpopcntq 291(%rax,%r14,8), %zmm20 + +// CHECK: vpopcntq (%rcx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x21] + vpopcntq (%rcx){1to8}, %zmm20 + +// CHECK: vpopcntq 4064(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0xa2,0xe0,0x0f,0x00,0x00] + vpopcntq 4064(%rdx), %zmm20 + +// CHECK: vpopcntq 4096(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x62,0x40] + vpopcntq 4096(%rdx), %zmm20 + +// CHECK: vpopcntq -4096(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x62,0xc0] + vpopcntq -4096(%rdx), %zmm20 + +// CHECK: vpopcntq -4128(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0xa2,0xe0,0xef,0xff,0xff] + vpopcntq -4128(%rdx), %zmm20 + +// CHECK: vpopcntq 1016(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x62,0x7f] + vpopcntq 1016(%rdx){1to8}, %zmm20 + +// CHECK: vpopcntq 1024(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0xa2,0x00,0x04,0x00,0x00] + vpopcntq 1024(%rdx){1to8}, %zmm20 + +// CHECK: vpopcntq -1024(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x62,0x80] + vpopcntq -1024(%rdx){1to8}, %zmm20 + +// CHECK: vpopcntq -1032(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0xa2,0xf8,0xfb,0xff,0xff] + vpopcntq -1032(%rdx){1to8}, %zmm20 + +// CHECK: vpopcntq %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x55,0xcd] + vpopcntq %zmm21, %zmm17 + +// CHECK: vpopcntq %zmm21, %zmm17 {%k6} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4e,0x55,0xcd] + vpopcntq %zmm21, %zmm17 {%k6} + +// CHECK: vpopcntq %zmm21, %zmm17 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xce,0x55,0xcd] + vpopcntq %zmm21, %zmm17 {%k6} {z} + +// CHECK: vpopcntq (%rcx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x09] + vpopcntq (%rcx), %zmm17 + +// CHECK: vpopcntq 4660(%rax,%r14,8), %zmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x55,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpopcntq 4660(%rax,%r14,8), %zmm17 + +// CHECK: vpopcntq (%rcx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x09] + vpopcntq (%rcx){1to8}, %zmm17 + +// CHECK: vpopcntq 4064(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x8a,0xe0,0x0f,0x00,0x00] + vpopcntq 4064(%rdx), %zmm17 + +// CHECK: vpopcntq 4096(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x4a,0x40] + vpopcntq 4096(%rdx), %zmm17 + +// CHECK: vpopcntq -4096(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x4a,0xc0] + vpopcntq -4096(%rdx), %zmm17 + +// CHECK: vpopcntq -4128(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x55,0x8a,0xe0,0xef,0xff,0xff] + vpopcntq -4128(%rdx), %zmm17 + +// CHECK: vpopcntq 1016(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x4a,0x7f] + vpopcntq 1016(%rdx){1to8}, %zmm17 + +// CHECK: vpopcntq 1024(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x8a,0x00,0x04,0x00,0x00] + vpopcntq 1024(%rdx){1to8}, %zmm17 + +// CHECK: vpopcntq -1024(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x4a,0x80] + vpopcntq -1024(%rdx){1to8}, %zmm17 + +// CHECK: vpopcntq -1032(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x55,0x8a,0xf8,0xfb,0xff,0xff] + vpopcntq -1032(%rdx){1to8}, %zmm17 + +// CHECK: vpopcntd %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0xcb] + vpopcntd %zmm19, %zmm25 + +// CHECK: vpopcntd %zmm19, %zmm25 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x55,0xcb] + vpopcntd %zmm19, %zmm25 {%k4} + +// CHECK: vpopcntd %zmm19, %zmm25 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x55,0xcb] + vpopcntd %zmm19, %zmm25 {%k4} {z} + +// CHECK: vpopcntd (%rcx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x09] + vpopcntd (%rcx), %zmm25 + +// CHECK: vpopcntd 291(%rax,%r14,8), %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpopcntd 291(%rax,%r14,8), %zmm25 + +// CHECK: vpopcntd (%rcx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x09] + vpopcntd (%rcx){1to16}, %zmm25 + +// CHECK: vpopcntd 4064(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x8a,0xe0,0x0f,0x00,0x00] + vpopcntd 4064(%rdx), %zmm25 + +// CHECK: vpopcntd 4096(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x4a,0x40] + vpopcntd 4096(%rdx), %zmm25 + +// CHECK: vpopcntd -4096(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x4a,0xc0] + vpopcntd -4096(%rdx), %zmm25 + +// CHECK: vpopcntd -4128(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x8a,0xe0,0xef,0xff,0xff] + vpopcntd -4128(%rdx), %zmm25 + +// CHECK: vpopcntd 508(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x4a,0x7f] + vpopcntd 508(%rdx){1to16}, %zmm25 + +// CHECK: vpopcntd 512(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x8a,0x00,0x02,0x00,0x00] + vpopcntd 512(%rdx){1to16}, %zmm25 + +// CHECK: vpopcntd -512(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x4a,0x80] + vpopcntd -512(%rdx){1to16}, %zmm25 + +// CHECK: vpopcntd -516(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x8a,0xfc,0xfd,0xff,0xff] + vpopcntd -516(%rdx){1to16}, %zmm25 + +// CHECK: vpopcntd %zmm21, %zmm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0xd5] + vpopcntd %zmm21, %zmm26 + +// CHECK: vpopcntd %zmm21, %zmm26 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x55,0xd5] + vpopcntd %zmm21, %zmm26 {%k4} + +// CHECK: vpopcntd %zmm21, %zmm26 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x55,0xd5] + vpopcntd %zmm21, %zmm26 {%k4} {z} + +// CHECK: vpopcntd (%rcx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x11] + vpopcntd (%rcx), %zmm26 + +// CHECK: vpopcntd 4660(%rax,%r14,8), %zmm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x55,0x94,0xf0,0x34,0x12,0x00,0x00] + vpopcntd 4660(%rax,%r14,8), %zmm26 + +// CHECK: vpopcntd (%rcx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x11] + vpopcntd (%rcx){1to16}, %zmm26 + +// CHECK: vpopcntd 4064(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x92,0xe0,0x0f,0x00,0x00] + vpopcntd 4064(%rdx), %zmm26 + +// CHECK: vpopcntd 4096(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x52,0x40] + vpopcntd 4096(%rdx), %zmm26 + +// CHECK: vpopcntd -4096(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x52,0xc0] + vpopcntd -4096(%rdx), %zmm26 + +// CHECK: vpopcntd -4128(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x55,0x92,0xe0,0xef,0xff,0xff] + vpopcntd -4128(%rdx), %zmm26 + +// CHECK: vpopcntd 508(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x52,0x7f] + vpopcntd 508(%rdx){1to16}, %zmm26 + +// CHECK: vpopcntd 512(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x92,0x00,0x02,0x00,0x00] + vpopcntd 512(%rdx){1to16}, %zmm26 + +// CHECK: vpopcntd -512(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x52,0x80] + vpopcntd -512(%rdx){1to16}, %zmm26 + +// CHECK: vpopcntd -516(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x55,0x92,0xfc,0xfd,0xff,0xff] + vpopcntd -516(%rdx){1to16}, %zmm26