Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -1609,6 +1609,25 @@ [IntrArgMemOnly]>; } +// BITALG bits shuffle +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_vpshufbitqmb_128 : + GCCBuiltin<"__builtin_ia32_vpshufbitqmb128_mask">, + Intrinsic<[llvm_i16_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_vpshufbitqmb_256 : + GCCBuiltin<"__builtin_ia32_vpshufbitqmb256_mask">, + Intrinsic<[llvm_i32_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_vpshufbitqmb_512 : + GCCBuiltin<"__builtin_ia32_vpshufbitqmb512_mask">, + Intrinsic<[llvm_i64_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; +} + //===----------------------------------------------------------------------===// // AVX2 Index: lib/Support/Host.cpp =================================================================== --- lib/Support/Host.cpp +++ lib/Support/Host.cpp @@ -1476,6 +1476,9 @@ // Enable protection keys Features["pku"] = HasLeaf7 && ((ECX >> 4) & 1); + // Enable Bit Algorithms + Features["avx512bitalg"] = HasLeaf7 && ((ECX >> 12) & 1) && HasAVX512Save; + bool HasLeafD = MaxLevel >= 0xd && !getX86CpuIDAndInfoEx(0xd, 0x1, &EAX, &EBX, &ECX, &EDX); Index: lib/Target/X86/X86.td =================================================================== --- lib/Target/X86/X86.td +++ lib/Target/X86/X86.td @@ -151,6 +151,9 @@ [FeatureAVX512]>; def FeaturePKU : SubtargetFeature<"pku", "HasPKU", "true", "Enable protection keys">; +def FeatureBITALG : SubtargetFeature<"avx512bitalg", "HasBITALG", "true", + "Enable AVX-512 Bit Algorithms", + [FeatureBWI]>; def FeaturePCLMUL : SubtargetFeature<"pclmul", "HasPCLMUL", "true", "Enable packed carry-less multiplication instructions", [FeatureSSE2]>; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -498,6 +498,9 @@ COMPRESS, EXPAND, + // Bits shuffle + VPSHUFBITQMB, + // Convert Unsigned/Integer to Floating-Point Value with rounding mode. SINT_TO_FP_RND, UINT_TO_FP_RND, SCALAR_SINT_TO_FP_RND, SCALAR_UINT_TO_FP_RND, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1548,6 +1548,11 @@ } } + if (Subtarget.hasBITALG()) + for (auto VT : { MVT::v64i8, MVT::v32i16, MVT::v32i8, + MVT::v16i16, MVT::v16i8, MVT::v8i16 }) + setOperationAction(ISD::CTPOP, VT, Legal); + // We want to custom lower some of our intrinsics. setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); @@ -24907,6 +24912,7 @@ case X86ISD::CVTS2UI_RND: return "X86ISD::CVTS2UI_RND"; case X86ISD::LWPINS: return "X86ISD::LWPINS"; case X86ISD::MGATHER: return "X86ISD::MGATHER"; + case X86ISD::VPSHUFBITQMB: return "X86ISD::VPSHUFBITQMB"; } return nullptr; } Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -9804,3 +9804,42 @@ defm : AVX512_scalar_math_f64_patterns; defm : AVX512_scalar_math_f64_patterns; defm : AVX512_scalar_math_f64_patterns; + +//===----------------------------------------------------------------------===// +// Bit Algorithms +//===----------------------------------------------------------------------===// + +defm VPOPCNTB : avx512_unary_rm_vl<0x54, "vpopcntb", ctpop, + avx512vl_i8_info, HasBITALG>, + avx512_unary_lowering; +defm VPOPCNTW : avx512_unary_rm_vl<0x54, "vpopcntw", ctpop, + avx512vl_i16_info, HasBITALG>, + avx512_unary_lowering, VEX_W; + +multiclass VPSHUFBITQMB_rm { + defm rr : AVX512_maskable_cmp<0x8F, MRMSrcReg, VTI, (outs VTI.KRC:$dst), + (ins VTI.RC:$src1, VTI.RC:$src2), + "vpshufbitqmb", + "$src2, $src1", "$src1, $src2", + (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), + (VTI.VT VTI.RC:$src2))>, EVEX_4V, T8PD; + defm rm : AVX512_maskable_cmp<0x8F, MRMSrcMem, VTI, (outs VTI.KRC:$dst), + (ins VTI.RC:$src1, VTI.MemOp:$src2), + "vpshufbitqmb", + "$src2, $src1", "$src1, $src2", + (X86Vpshufbitqmb (VTI.VT VTI.RC:$src1), + (VTI.VT (bitconvert (VTI.LdFrag addr:$src2))))>, + EVEX_4V, EVEX_CD8<8, CD8VF>, T8PD; +} + +multiclass VPSHUFBITQMB_common { + let Predicates = [HasBITALG] in + defm Z : VPSHUFBITQMB_rm, EVEX_V512; + let Predicates = [HasBITALG, HasVLX] in { + defm Z256 : VPSHUFBITQMB_rm, EVEX_V256; + defm Z128 : VPSHUFBITQMB_rm, EVEX_V128; + } +} + +defm VPSHUFBITQMB : VPSHUFBITQMB_common; + Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -524,6 +524,13 @@ def X86expand : SDNode<"X86ISD::EXPAND", SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisVec<1>]>, []>; +// vpshufbitqmb +def X86Vpshufbitqmb : SDNode<"X86ISD::VPSHUFBITQMB", + SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, + SDTCisSameAs<1,2>, + SDTCVecEltisVT<0,i1>, + SDTCisSameNumEltsAs<0,1>]>>; + def SDTintToFPRound: SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisFP<0>, SDTCisSameAs<0,1>, SDTCisInt<2>, SDTCisVT<3, i32>]>; Index: lib/Target/X86/X86InstrInfo.td =================================================================== --- lib/Target/X86/X86InstrInfo.td +++ lib/Target/X86/X86InstrInfo.td @@ -823,6 +823,7 @@ def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def PKU : Predicate<"Subtarget->hasPKU()">; +def HasBITALG : Predicate<"Subtarget->hasBITALG()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; def HasFXSR : Predicate<"Subtarget->hasFXSR()">; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1211,6 +1211,12 @@ X86ISD::VPMADD52L, 0), X86_INTRINSIC_DATA(avx512_mask_vpmadd52l_uq_512 , IFMA_OP_MASK, X86ISD::VPMADD52L, 0), + X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_128, CMP_MASK, + X86ISD::VPSHUFBITQMB, 0), + X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_256, CMP_MASK, + X86ISD::VPSHUFBITQMB, 0), + X86_INTRINSIC_DATA(avx512_mask_vpshufbitqmb_512, CMP_MASK, + X86ISD::VPSHUFBITQMB, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, ISD::FMA, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, ISD::FMA, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, ISD::FMA, Index: lib/Target/X86/X86Subtarget.h =================================================================== --- lib/Target/X86/X86Subtarget.h +++ lib/Target/X86/X86Subtarget.h @@ -297,6 +297,9 @@ /// Processor has PKU extenstions bool HasPKU; + /// Processor has AVX-512 Bit Algorithms instructions + bool HasBITALG; + /// Processor supports MPX - Memory Protection Extensions bool HasMPX; @@ -520,6 +523,7 @@ bool hasBWI() const { return HasBWI; } bool hasVLX() const { return HasVLX; } bool hasPKU() const { return HasPKU; } + bool hasBITALG() const { return HasBITALG; } bool hasMPX() const { return HasMPX; } bool hasCLFLUSHOPT() const { return HasCLFLUSHOPT; } bool hasCLWB() const { return HasCLWB; } Index: lib/Target/X86/X86Subtarget.cpp =================================================================== --- lib/Target/X86/X86Subtarget.cpp +++ lib/Target/X86/X86Subtarget.cpp @@ -319,6 +319,7 @@ HasVLX = false; HasADX = false; HasPKU = false; + HasBITALG = false; HasSHA = false; HasPRFCHW = false; HasRDSEED = false; Index: test/CodeGen/X86/vector-popcnt-128.ll =================================================================== --- test/CodeGen/X86/vector-popcnt-128.ll +++ test/CodeGen/X86/vector-popcnt-128.ll @@ -6,6 +6,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=BITALG_NOVLX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG define <2 x i64> @testv2i64(<2 x i64> %in) nounwind { ; SSE2-LABEL: testv2i64: @@ -381,6 +383,19 @@ ; AVX512VPOPCNTDQ-NEXT: vpmovqw %zmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq +; +; BITALG_NOVLX-LABEL: testv8i16: +; BITALG_NOVLX: # BB#0: +; BITALG_NOVLX-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; BITALG_NOVLX-NEXT: vzeroupper +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: testv8i16: +; BITALG: # BB#0: +; BITALG-NEXT: vpopcntw %xmm0, %xmm0 +; BITALG-NEXT: retq %out = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> %in) ret <8 x i16> %out } @@ -485,6 +500,19 @@ ; AVX512VPOPCNTDQ-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512VPOPCNTDQ-NEXT: vzeroupper ; AVX512VPOPCNTDQ-NEXT: retq +; +; BITALG_NOVLX-LABEL: testv16i8: +; BITALG_NOVLX: # BB#0: +; BITALG_NOVLX-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: %XMM0 %XMM0 %ZMM0 +; BITALG_NOVLX-NEXT: vzeroupper +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: testv16i8: +; BITALG: # BB#0: +; BITALG-NEXT: vpopcntb %xmm0, %xmm0 +; BITALG-NEXT: retq %out = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> %in) ret <16 x i8> %out } Index: test/CodeGen/X86/vector-popcnt-256.ll =================================================================== --- test/CodeGen/X86/vector-popcnt-256.ll +++ test/CodeGen/X86/vector-popcnt-256.ll @@ -2,6 +2,8 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512VPOPCNTDQ +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=BITALG_NOVLX +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=BITALG define <4 x i64> @testv4i64(<4 x i64> %in) nounwind { ; AVX1-LABEL: testv4i64: @@ -159,6 +161,18 @@ ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpmovdw %zmm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq +; +; BITALG_NOVLX-LABEL: testv16i16: +; BITALG_NOVLX: # BB#0: +; BITALG_NOVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; BITALG_NOVLX-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: testv16i16: +; BITALG: # BB#0: +; BITALG-NEXT: vpopcntw %ymm0, %ymm0 +; BITALG-NEXT: retq %out = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> %in) ret <16 x i16> %out } @@ -207,6 +221,18 @@ ; AVX512VPOPCNTDQ-NEXT: vpshufb %ymm0, %ymm3, %ymm0 ; AVX512VPOPCNTDQ-NEXT: vpaddb %ymm2, %ymm0, %ymm0 ; AVX512VPOPCNTDQ-NEXT: retq +; +; BITALG_NOVLX-LABEL: testv32i8: +; BITALG_NOVLX: # BB#0: +; BITALG_NOVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; BITALG_NOVLX-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG_NOVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; BITALG_NOVLX-NEXT: retq +; +; BITALG-LABEL: testv32i8: +; BITALG: # BB#0: +; BITALG-NEXT: vpopcntb %ymm0, %ymm0 +; BITALG-NEXT: retq %out = call <32 x i8> @llvm.ctpop.v32i8(<32 x i8> %in) ret <32 x i8> %out } Index: test/CodeGen/X86/vector-popcnt-512.ll =================================================================== --- test/CodeGen/X86/vector-popcnt-512.ll +++ test/CodeGen/X86/vector-popcnt-512.ll @@ -3,6 +3,7 @@ ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512BW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-NOBW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vpopcntdq,+avx512bw | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512VPOPCNTDQ --check-prefix=AVX512VPOPCNTDQ-BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bitalg | FileCheck %s --check-prefix=AVX512 --check-prefix=BITALG define <8 x i64> @testv8i64(<8 x i64> %in) nounwind { ; AVX512F-LABEL: testv8i64: @@ -172,6 +173,11 @@ ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpsrlw $8, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq +; +; BITALG-LABEL: testv32i16: +; BITALG: # BB#0: +; BITALG-NEXT: vpopcntw %zmm0, %zmm0 +; BITALG-NEXT: retq %out = call <32 x i16> @llvm.ctpop.v32i16(<32 x i16> %in) ret <32 x i16> %out } @@ -236,6 +242,11 @@ ; AVX512VPOPCNTDQ-BW-NEXT: vpshufb %zmm0, %zmm3, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: vpaddb %zmm2, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-BW-NEXT: retq +; +; BITALG-LABEL: testv64i8: +; BITALG: # BB#0: +; BITALG-NEXT: vpopcntb %zmm0, %zmm0 +; BITALG-NEXT: retq %out = call <64 x i8> @llvm.ctpop.v64i8(<64 x i8> %in) ret <64 x i8> %out } Index: test/CodeGen/X86/vpshufbitqmb-intrinsics.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/vpshufbitqmb-intrinsics.ll @@ -0,0 +1,41 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512bitalg,+avx512vl | FileCheck %s + +declare i16 @llvm.x86.avx512.mask.vpshufbitqmb.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) +define i16 @test_vpshufbitqmb_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) { +; CHECK-LABEL: test_vpshufbitqmb_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpshufbitqmb %xmm1, %xmm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: ## kill: %AX %AX %EAX +; CHECK-NEXT: retq + %res = call i16 @llvm.x86.avx512.mask.vpshufbitqmb.128(<16 x i8> %a, <16 x i8> %b, i16 %mask) + ret i16 %res +} + +declare i32 @llvm.x86.avx512.mask.vpshufbitqmb.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) +define i32 @test_vpshufbitqmb_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) { +; CHECK-LABEL: test_vpshufbitqmb_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpshufbitqmb %ymm1, %ymm0, %k0 {%k1} +; CHECK-NEXT: kmovd %k0, %eax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %res = call i32 @llvm.x86.avx512.mask.vpshufbitqmb.256(<32 x i8> %a, <32 x i8> %b, i32 %mask) + ret i32 %res +} + +declare i64 @llvm.x86.avx512.mask.vpshufbitqmb.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) +define i64 @test_vpshufbitqmb_512(<64 x i8> %a, <64 x i8> %b, i64 %mask) { +; CHECK-LABEL: test_vpshufbitqmb_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vpshufbitqmb %zmm1, %zmm0, %k0 {%k1} +; CHECK-NEXT: kmovq %k0, %rax +; CHECK-NEXT: vzeroupper +; CHECK-NEXT: retq + %res = call i64 @llvm.x86.avx512.mask.vpshufbitqmb.512(<64 x i8> %a, <64 x i8> %b, i64 %mask) + ret i64 %res +} Index: test/MC/X86/avx512bitalg-encoding.s =================================================================== --- /dev/null +++ test/MC/X86/avx512bitalg-encoding.s @@ -0,0 +1,170 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512bitalg --show-encoding < %s | FileCheck %s + +// CHECK: vpopcntb %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x54,0xef] + vpopcntb %zmm23, %zmm21 + +// CHECK: vpopcntw %zmm23, %zmm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x54,0xef] + vpopcntw %zmm23, %zmm21 + +// CHECK: vpopcntb %zmm3, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x7d,0x4a,0x54,0xcb] + vpopcntb %zmm3, %zmm1 {%k2} + +// CHECK: vpopcntw %zmm3, %zmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0xfd,0x4a,0x54,0xcb] + vpopcntw %zmm3, %zmm1 {%k2} + +// CHECK: vpopcntb (%rcx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x54,0x09] + vpopcntb (%rcx), %zmm1 + +// CHECK: vpopcntb -256(%rsp), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x54,0x4c,0x24,0xfc] + vpopcntb -256(%rsp), %zmm1 + +// CHECK: vpopcntb 256(%rsp), %zmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x48,0x54,0x4c,0x24,0x04] + vpopcntb 256(%rsp), %zmm1 + +// CHECK: vpopcntb 268435456(%rcx,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpopcntb 268435456(%rcx,%r14,8), %zmm1 + +// CHECK: vpopcntb -536870912(%rcx,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpopcntb -536870912(%rcx,%r14,8), %zmm1 + +// CHECK: vpopcntb -536870910(%rcx,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x48,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpopcntb -536870910(%rcx,%r14,8), %zmm1 + +// CHECK: vpopcntw (%rcx), %zmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x54,0x09] + vpopcntw (%rcx), %zmm1 + +// CHECK: vpopcntw -256(%rsp), %zmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x54,0x4c,0x24,0xfc] + vpopcntw -256(%rsp), %zmm1 + +// CHECK: vpopcntw 256(%rsp), %zmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x54,0x4c,0x24,0x04] + vpopcntw 256(%rsp), %zmm1 + +// CHECK: vpopcntw 268435456(%rcx,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpopcntw 268435456(%rcx,%r14,8), %zmm1 + +// CHECK: vpopcntw -536870912(%rcx,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpopcntw -536870912(%rcx,%r14,8), %zmm1 + +// CHECK: vpopcntw -536870910(%rcx,%r14,8), %zmm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x48,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpopcntw -536870910(%rcx,%r14,8), %zmm1 + +// CHECK: vpopcntb (%rcx), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x54,0x29] + vpopcntb (%rcx), %zmm21 {%k2} + +// CHECK: vpopcntb -256(%rsp), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x54,0x6c,0x24,0xfc] + vpopcntb -256(%rsp), %zmm21 {%k2} + +// CHECK: vpopcntb 256(%rsp), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x4a,0x54,0x6c,0x24,0x04] + vpopcntb 256(%rsp), %zmm21 {%k2} + +// CHECK: vpopcntb 268435456(%rcx,%r14,8), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10] + vpopcntb 268435456(%rcx,%r14,8), %zmm21 {%k2} + +// CHECK: vpopcntb -536870912(%rcx,%r14,8), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpopcntb -536870912(%rcx,%r14,8), %zmm21 {%k2} + +// CHECK: vpopcntb -536870910(%rcx,%r14,8), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x4a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpopcntb -536870910(%rcx,%r14,8), %zmm21 {%k2} + +// CHECK: vpopcntw (%rcx), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x54,0x29] + vpopcntw (%rcx), %zmm21 {%k2} + +// CHECK: vpopcntw -256(%rsp), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x54,0x6c,0x24,0xfc] + vpopcntw -256(%rsp), %zmm21 {%k2} + +// CHECK: vpopcntw 256(%rsp), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x4a,0x54,0x6c,0x24,0x04] + vpopcntw 256(%rsp), %zmm21 {%k2} + +// CHECK: vpopcntw 268435456(%rcx,%r14,8), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10] + vpopcntw 268435456(%rcx,%r14,8), %zmm21 {%k2} + +// CHECK: vpopcntw -536870912(%rcx,%r14,8), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpopcntw -536870912(%rcx,%r14,8), %zmm21 {%k2} + +// CHECK: vpopcntw -536870910(%rcx,%r14,8), %zmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpopcntw -536870910(%rcx,%r14,8), %zmm21 {%k2} + +// CHECK: vpshufbitqmb %zmm2, %zmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0xca] + vpshufbitqmb %zmm2, %zmm23, %k1 + +// CHECK: vpshufbitqmb %zmm2, %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0xca] + vpshufbitqmb %zmm2, %zmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb (%rcx), %zmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x09] + vpshufbitqmb (%rcx), %zmm23, %k1 + +// CHECK: vpshufbitqmb -256(%rsp), %zmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x4c,0x24,0xfc] + vpshufbitqmb -256(%rsp), %zmm23, %k1 + +// CHECK: vpshufbitqmb 256(%rsp), %zmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x40,0x8f,0x4c,0x24,0x04] + vpshufbitqmb 256(%rsp), %zmm23, %k1 + +// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1 + +// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1 + +// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x40,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1 + +// CHECK: vpshufbitqmb (%rcx), %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x09] + vpshufbitqmb (%rcx), %zmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -256(%rsp), %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x4c,0x24,0xfc] + vpshufbitqmb -256(%rsp), %zmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb 256(%rsp), %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x42,0x8f,0x4c,0x24,0x04] + vpshufbitqmb 256(%rsp), %zmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpshufbitqmb 268435456(%rcx,%r14,8), %zmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpshufbitqmb -536870912(%rcx,%r14,8), %zmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x42,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpshufbitqmb -536870910(%rcx,%r14,8), %zmm23, %k1 {%k2} + Index: test/MC/X86/avx512vl_bitalg-encoding.s =================================================================== --- /dev/null +++ test/MC/X86/avx512vl_bitalg-encoding.s @@ -0,0 +1,338 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512vl,+avx512bitalg --show-encoding < %s | FileCheck %s + +// CHECK: vpopcntb %xmm23, %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x54,0xef] + vpopcntb %xmm23, %xmm21 + +// CHECK: vpopcntw %xmm23, %xmm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x54,0xef] + vpopcntw %xmm23, %xmm21 + +// CHECK: vpopcntb %xmm3, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x7d,0x0a,0x54,0xcb] + vpopcntb %xmm3, %xmm1 {%k2} + +// CHECK: vpopcntw %xmm3, %xmm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0xfd,0x0a,0x54,0xcb] + vpopcntw %xmm3, %xmm1 {%k2} + +// CHECK: vpopcntb (%rcx), %xmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x54,0x09] + vpopcntb (%rcx), %xmm1 + +// CHECK: vpopcntb -64(%rsp), %xmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x54,0x4c,0x24,0xfc] + vpopcntb -64(%rsp), %xmm1 + +// CHECK: vpopcntb 64(%rsp), %xmm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x08,0x54,0x4c,0x24,0x04] + vpopcntb 64(%rsp), %xmm1 + +// CHECK: vpopcntb 268435456(%rcx,%r14,8), %xmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpopcntb 268435456(%rcx,%r14,8), %xmm1 + +// CHECK: vpopcntb -536870912(%rcx,%r14,8), %xmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpopcntb -536870912(%rcx,%r14,8), %xmm1 + +// CHECK: vpopcntb -536870910(%rcx,%r14,8), %xmm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x08,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpopcntb -536870910(%rcx,%r14,8), %xmm1 + +// CHECK: vpopcntw (%rcx), %xmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x54,0x09] + vpopcntw (%rcx), %xmm1 + +// CHECK: vpopcntw -64(%rsp), %xmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x54,0x4c,0x24,0xfc] + vpopcntw -64(%rsp), %xmm1 + +// CHECK: vpopcntw 64(%rsp), %xmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x08,0x54,0x4c,0x24,0x04] + vpopcntw 64(%rsp), %xmm1 + +// CHECK: vpopcntw 268435456(%rcx,%r14,8), %xmm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpopcntw 268435456(%rcx,%r14,8), %xmm1 + +// CHECK: vpopcntw -536870912(%rcx,%r14,8), %xmm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpopcntw -536870912(%rcx,%r14,8), %xmm1 + +// CHECK: vpopcntw -536870910(%rcx,%r14,8), %xmm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x08,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpopcntw -536870910(%rcx,%r14,8), %xmm1 + +// CHECK: vpopcntb (%rcx), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x54,0x29] + vpopcntb (%rcx), %xmm21 {%k2} + +// CHECK: vpopcntb -64(%rsp), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x54,0x6c,0x24,0xfc] + vpopcntb -64(%rsp), %xmm21 {%k2} + +// CHECK: vpopcntb 64(%rsp), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0a,0x54,0x6c,0x24,0x04] + vpopcntb 64(%rsp), %xmm21 {%k2} + +// CHECK: vpopcntb 268435456(%rcx,%r14,8), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10] + vpopcntb 268435456(%rcx,%r14,8), %xmm21 {%k2} + +// CHECK: vpopcntb -536870912(%rcx,%r14,8), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpopcntb -536870912(%rcx,%r14,8), %xmm21 {%k2} + +// CHECK: vpopcntb -536870910(%rcx,%r14,8), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpopcntb -536870910(%rcx,%r14,8), %xmm21 {%k2} + +// CHECK: vpopcntw (%rcx), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x54,0x29] + vpopcntw (%rcx), %xmm21 {%k2} + +// CHECK: vpopcntw -64(%rsp), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x54,0x6c,0x24,0xfc] + vpopcntw -64(%rsp), %xmm21 {%k2} + +// CHECK: vpopcntw 64(%rsp), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x54,0x6c,0x24,0x04] + vpopcntw 64(%rsp), %xmm21 {%k2} + +// CHECK: vpopcntw 268435456(%rcx,%r14,8), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10] + vpopcntw 268435456(%rcx,%r14,8), %xmm21 {%k2} + +// CHECK: vpopcntw -536870912(%rcx,%r14,8), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpopcntw -536870912(%rcx,%r14,8), %xmm21 {%k2} + +// CHECK: vpopcntw -536870910(%rcx,%r14,8), %xmm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x0a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpopcntw -536870910(%rcx,%r14,8), %xmm21 {%k2} + +// CHECK: vpopcntb %ymm23, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x54,0xef] + vpopcntb %ymm23, %ymm21 + +// CHECK: vpopcntw %ymm23, %ymm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x54,0xef] + vpopcntw %ymm23, %ymm21 + +// CHECK: vpopcntb %ymm3, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x7d,0x2a,0x54,0xcb] + vpopcntb %ymm3, %ymm1 {%k2} + +// CHECK: vpopcntw %ymm3, %ymm1 {%k2} +// CHECK: encoding: [0x62,0xf2,0xfd,0x2a,0x54,0xcb] + vpopcntw %ymm3, %ymm1 {%k2} + +// CHECK: vpopcntb (%rcx), %ymm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x54,0x09] + vpopcntb (%rcx), %ymm1 + +// CHECK: vpopcntb -128(%rsp), %ymm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x54,0x4c,0x24,0xfc] + vpopcntb -128(%rsp), %ymm1 + +// CHECK: vpopcntb 128(%rsp), %ymm1 +// CHECK: encoding: [0x62,0xf2,0x7d,0x28,0x54,0x4c,0x24,0x04] + vpopcntb 128(%rsp), %ymm1 + +// CHECK: vpopcntb 268435456(%rcx,%r14,8), %ymm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpopcntb 268435456(%rcx,%r14,8), %ymm1 + +// CHECK: vpopcntb -536870912(%rcx,%r14,8), %ymm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpopcntb -536870912(%rcx,%r14,8), %ymm1 + +// CHECK: vpopcntb -536870910(%rcx,%r14,8), %ymm1 +// CHECK: encoding: [0x62,0xb2,0x7d,0x28,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpopcntb -536870910(%rcx,%r14,8), %ymm1 + +// CHECK: vpopcntw (%rcx), %ymm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x54,0x09] + vpopcntw (%rcx), %ymm1 + +// CHECK: vpopcntw -128(%rsp), %ymm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x54,0x4c,0x24,0xfc] + vpopcntw -128(%rsp), %ymm1 + +// CHECK: vpopcntw 128(%rsp), %ymm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x28,0x54,0x4c,0x24,0x04] + vpopcntw 128(%rsp), %ymm1 + +// CHECK: vpopcntw 268435456(%rcx,%r14,8), %ymm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpopcntw 268435456(%rcx,%r14,8), %ymm1 + +// CHECK: vpopcntw -536870912(%rcx,%r14,8), %ymm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x54,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpopcntw -536870912(%rcx,%r14,8), %ymm1 + +// CHECK: vpopcntw -536870910(%rcx,%r14,8), %ymm1 +// CHECK: encoding: [0x62,0xb2,0xfd,0x28,0x54,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpopcntw -536870910(%rcx,%r14,8), %ymm1 + +// CHECK: vpopcntb (%rcx), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x54,0x29] + vpopcntb (%rcx), %ymm21 {%k2} + +// CHECK: vpopcntb -128(%rsp), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x54,0x6c,0x24,0xfc] + vpopcntb -128(%rsp), %ymm21 {%k2} + +// CHECK: vpopcntb 128(%rsp), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0x7d,0x2a,0x54,0x6c,0x24,0x04] + vpopcntb 128(%rsp), %ymm21 {%k2} + +// CHECK: vpopcntb 268435456(%rcx,%r14,8), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10] + vpopcntb 268435456(%rcx,%r14,8), %ymm21 {%k2} + +// CHECK: vpopcntb -536870912(%rcx,%r14,8), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpopcntb -536870912(%rcx,%r14,8), %ymm21 {%k2} + +// CHECK: vpopcntb -536870910(%rcx,%r14,8), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x2a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpopcntb -536870910(%rcx,%r14,8), %ymm21 {%k2} + +// CHECK: vpopcntw (%rcx), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x54,0x29] + vpopcntw (%rcx), %ymm21 {%k2} + +// CHECK: vpopcntw -128(%rsp), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x54,0x6c,0x24,0xfc] + vpopcntw -128(%rsp), %ymm21 {%k2} + +// CHECK: vpopcntw 128(%rsp), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2a,0x54,0x6c,0x24,0x04] + vpopcntw 128(%rsp), %ymm21 {%k2} + +// CHECK: vpopcntw 268435456(%rcx,%r14,8), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0x10] + vpopcntw 268435456(%rcx,%r14,8), %ymm21 {%k2} + +// CHECK: vpopcntw -536870912(%rcx,%r14,8), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x00,0x00,0x00,0xe0] + vpopcntw -536870912(%rcx,%r14,8), %ymm21 {%k2} + +// CHECK: vpopcntw -536870910(%rcx,%r14,8), %ymm21 {%k2} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2a,0x54,0xac,0xf1,0x02,0x00,0x00,0xe0] + vpopcntw -536870910(%rcx,%r14,8), %ymm21 {%k2} + +// CHECK: vpshufbitqmb %xmm2, %xmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0xca] + vpshufbitqmb %xmm2, %xmm23, %k1 + +// CHECK: vpshufbitqmb %xmm2, %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0xca] + vpshufbitqmb %xmm2, %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb (%rcx), %xmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x09] + vpshufbitqmb (%rcx), %xmm23, %k1 + +// CHECK: vpshufbitqmb -64(%rsp), %xmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x4c,0x24,0xfc] + vpshufbitqmb -64(%rsp), %xmm23, %k1 + +// CHECK: vpshufbitqmb 64(%rsp), %xmm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x00,0x8f,0x4c,0x24,0x04] + vpshufbitqmb 64(%rsp), %xmm23, %k1 + +// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1 + +// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1 + +// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x00,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1 + +// CHECK: vpshufbitqmb (%rcx), %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x09] + vpshufbitqmb (%rcx), %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -64(%rsp), %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x4c,0x24,0xfc] + vpshufbitqmb -64(%rsp), %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb 64(%rsp), %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x02,0x8f,0x4c,0x24,0x04] + vpshufbitqmb 64(%rsp), %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpshufbitqmb 268435456(%rcx,%r14,8), %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpshufbitqmb -536870912(%rcx,%r14,8), %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x02,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpshufbitqmb -536870910(%rcx,%r14,8), %xmm23, %k1 {%k2} + +// CHECK: vpshufbitqmb %ymm2, %ymm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0xca] + vpshufbitqmb %ymm2, %ymm23, %k1 + +// CHECK: vpshufbitqmb %ymm2, %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0xca] + vpshufbitqmb %ymm2, %ymm23, %k1 {%k2} + +// CHECK: vpshufbitqmb (%rcx), %ymm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x09] + vpshufbitqmb (%rcx), %ymm23, %k1 + +// CHECK: vpshufbitqmb -128(%rsp), %ymm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x4c,0x24,0xfc] + vpshufbitqmb -128(%rsp), %ymm23, %k1 + +// CHECK: vpshufbitqmb 128(%rsp), %ymm23, %k1 +// CHECK: encoding: [0x62,0xf2,0x45,0x20,0x8f,0x4c,0x24,0x04] + vpshufbitqmb 128(%rsp), %ymm23, %k1 + +// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1 + +// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1 + +// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1 +// CHECK: encoding: [0x62,0xb2,0x45,0x20,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1 + +// CHECK: vpshufbitqmb (%rcx), %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x09] + vpshufbitqmb (%rcx), %ymm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -128(%rsp), %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x4c,0x24,0xfc] + vpshufbitqmb -128(%rsp), %ymm23, %k1 {%k2} + +// CHECK: vpshufbitqmb 128(%rsp), %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xf2,0x45,0x22,0x8f,0x4c,0x24,0x04] + vpshufbitqmb 128(%rsp), %ymm23, %k1 {%k2} + +// CHECK: vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x00,0x00,0x00,0x10] + vpshufbitqmb 268435456(%rcx,%r14,8), %ymm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x00,0x00,0x00,0xe0] + vpshufbitqmb -536870912(%rcx,%r14,8), %ymm23, %k1 {%k2} + +// CHECK: vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1 {%k2} +// CHECK: encoding: [0x62,0xb2,0x45,0x22,0x8f,0x8c,0xf1,0x02,0x00,0x00,0xe0] + vpshufbitqmb -536870910(%rcx,%r14,8), %ymm23, %k1 {%k2} +