Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -5094,41 +5094,51 @@ // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// -multiclass avx512_logic_rm opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, bit IsCommutable = 0> { +// OpNodeMsk is the OpNode to use when element size is important. OpNode will +// be set to null_frag for 32-bit elements. +multiclass avx512_logic_rm opc, string OpcodeStr, + SDPatternOperator OpNode, + SDNode OpNodeMsk, X86VectorVTInfo _, + bit IsCommutable = 0> { + let hasSideEffects = 0 in defm rr : AVX512_maskable_logic, AVX512BIBase, EVEX_4V; + let hasSideEffects = 0, mayLoad = 1 in defm rm : AVX512_maskable_logic, AVX512BIBase, EVEX_4V; } -multiclass avx512_logic_rmb opc, string OpcodeStr, SDNode OpNode, - X86VectorVTInfo _, bit IsCommutable = 0> : - avx512_logic_rm { +// OpNodeMsk is the OpNode to use where element size is important. So use +// for all of the broadcast patterns. +multiclass avx512_logic_rmb opc, string OpcodeStr, + SDPatternOperator OpNode, + SDNode OpNodeMsk, X86VectorVTInfo _, + bit IsCommutable = 0> : + avx512_logic_rm { defm rmb : AVX512_maskable_logic opc, string OpcodeStr, SDNode OpNode, - AVX512VLVectorVTInfo VTInfo, +multiclass avx512_logic_rmb_vl opc, string OpcodeStr, + SDPatternOperator OpNode, + SDNode OpNodeMsk, AVX512VLVectorVTInfo VTInfo, bit IsCommutable = 0> { let Predicates = [HasAVX512] in - defm Z : avx512_logic_rmb, EVEX_V512; let Predicates = [HasAVX512, HasVLX] in { - defm Z256 : avx512_logic_rmb, EVEX_V256; - defm Z128 : avx512_logic_rmb, EVEX_V128; + defm Z256 : avx512_logic_rmb, EVEX_V256; + defm Z128 : avx512_logic_rmb, EVEX_V128; } } -multiclass avx512_logic_rm_vl_d opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable = 0> { - defm NAME : avx512_logic_rmb_vl, EVEX_CD8<32, CD8VF>; -} - -multiclass avx512_logic_rm_vl_q opc, string OpcodeStr, SDNode OpNode, - bit IsCommutable = 0> { - defm NAME : avx512_logic_rmb_vl, - VEX_W, EVEX_CD8<64, CD8VF>; -} - multiclass avx512_logic_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, SDNode OpNode, bit IsCommutable = 0> { - defm Q : avx512_logic_rm_vl_q; - defm D : avx512_logic_rm_vl_d; + defm Q : avx512_logic_rmb_vl, + VEX_W, EVEX_CD8<64, CD8VF>; + defm D : avx512_logic_rmb_vl, + EVEX_CD8<32, CD8VF>; } defm VPAND : avx512_logic_rm_vl_dq<0xDB, 0xDB, "vpand", and, 1>; Index: llvm/trunk/test/CodeGen/X86/avx512-arith.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-arith.ll +++ llvm/trunk/test/CodeGen/X86/avx512-arith.ll @@ -607,17 +607,17 @@ define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; AVX512F-LABEL: andd512fold: ; AVX512F: # BB#0: # %entry -; AVX512F-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512F-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: andd512fold: ; AVX512VL: # BB#0: # %entry -; AVX512VL-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512VL-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: andd512fold: ; AVX512BW: # BB#0: # %entry -; AVX512BW-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512DQ-LABEL: andd512fold: Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics-upgrade.ll @@ -959,7 +959,7 @@ define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_xor_epi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) ret < 16 x i32> %res @@ -981,7 +981,7 @@ define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_or_epi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpord %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vporq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) ret < 16 x i32> %res @@ -1003,7 +1003,7 @@ define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) { ; CHECK-LABEL: test_and_epi32: ; CHECK: ## BB#0: -; CHECK-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1) ret < 16 x i32> %res Index: llvm/trunk/test/CodeGen/X86/avx512-logic.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-logic.ll +++ llvm/trunk/test/CodeGen/X86/avx512-logic.ll @@ -7,7 +7,7 @@ ; ALL-LABEL: vpandd: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; ALL-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -21,7 +21,7 @@ ; ALL-LABEL: vpandnd: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpandnd %zmm0, %zmm1, %zmm0 +; ALL-NEXT: vpandnq %zmm0, %zmm1, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -37,7 +37,7 @@ ; ALL-LABEL: vpord: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpord %zmm1, %zmm0, %zmm0 +; ALL-NEXT: vporq %zmm1, %zmm0, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -51,7 +51,7 @@ ; ALL-LABEL: vpxord: ; ALL: ## BB#0: ## %entry ; ALL-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 -; ALL-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; ALL-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; ALL-NEXT: retq entry: ; Force the execution domain with an add. @@ -132,7 +132,7 @@ define <16 x i32> @andd512fold(<16 x i32> %y, <16 x i32>* %x) { ; KNL-LABEL: andd512fold: ; KNL: ## BB#0: ## %entry -; KNL-NEXT: vpandd (%rdi), %zmm0, %zmm0 +; KNL-NEXT: vpandq (%rdi), %zmm0, %zmm0 ; KNL-NEXT: retq ; ; SKX-LABEL: andd512fold: Index: llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll +++ llvm/trunk/test/CodeGen/X86/broadcast-elm-cross-splat-vec.ll @@ -1335,7 +1335,7 @@ ; AVX512-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retl ; ; AVX-64-LABEL: f16xi32_i128: @@ -1369,7 +1369,7 @@ ; AVX512F-64-NEXT: vbroadcasti32x4 {{.*#+}} zmm1 = [0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512F-64-NEXT: # zmm1 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; AVX512F-64-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512F-64-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512F-64-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512F-64-NEXT: retq %res1 = add <16 x i32> , %a %res2 = and <16 x i32> , %res1 Index: llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll +++ llvm/trunk/test/CodeGen/X86/vector-bitreverse.ll @@ -2051,17 +2051,17 @@ ; AVX512F-NEXT: vpslld $4, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: vpsrld $4, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1 ; AVX512F-NEXT: vpslld $2, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: vpsrld $2, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1 ; AVX512F-NEXT: vpslld $1, %zmm1, %zmm1 ; AVX512F-NEXT: vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm0 ; AVX512F-NEXT: vpsrld $1, %zmm0, %zmm0 -; AVX512F-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512F-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: retq ; ; AVX512BW-LABEL: test_bitreverse_v16i32: Index: llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-lzcnt-512.ll @@ -176,7 +176,7 @@ ; AVX512BW-LABEL: testv16i32: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 @@ -206,7 +206,7 @@ ; AVX512DQ-LABEL: testv16i32: ; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 @@ -263,7 +263,7 @@ ; AVX512BW-LABEL: testv16i32u: ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512BW-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512BW-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsrld $4, %zmm0, %zmm1 @@ -293,7 +293,7 @@ ; AVX512DQ-LABEL: testv16i32u: ; AVX512DQ: # BB#0: ; AVX512DQ-NEXT: vpsrld $1, %zmm0, %zmm1 -; AVX512DQ-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $2, %zmm0, %zmm1 ; AVX512DQ-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512DQ-NEXT: vpsrld $4, %zmm0, %zmm1 Index: llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-rotate-512.ll @@ -696,7 +696,7 @@ ; AVX512-LABEL: splatconstant_rotate_mask_v16i32: ; AVX512: # BB#0: ; AVX512-NEXT: vprold $4, %zmm0, %zmm0 -; AVX512-NEXT: vpandd {{.*}}(%rip), %zmm0, %zmm0 +; AVX512-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512-NEXT: retq %shl = shl <16 x i32> %a, %lshr = lshr <16 x i32> %a, Index: llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll +++ llvm/trunk/test/CodeGen/X86/vector-trunc-math.ll @@ -3116,7 +3116,7 @@ ; ; AVX512-LABEL: trunc_and_v16i32_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -3830,7 +3830,7 @@ ; ; AVX512-LABEL: trunc_xor_v16i32_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -4544,7 +4544,7 @@ ; ; AVX512-LABEL: trunc_or_v16i32_v16i8: ; AVX512: # BB#0: -; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 +; AVX512-NEXT: vporq %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: vpmovdb %zmm0, %xmm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq Index: llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll +++ llvm/trunk/test/CodeGen/X86/vector-tzcnt-512.ll @@ -139,7 +139,7 @@ ; AVX512CD: # BB#0: ; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512CD-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; AVX512CD-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512CD-NEXT: vextracti64x4 $1, %zmm0, %ymm1 @@ -175,7 +175,7 @@ ; AVX512CDBW: # BB#0: ; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 -; AVX512CDBW-NEXT: vpandd %zmm2, %zmm0, %zmm0 +; AVX512CDBW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512CDBW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; AVX512CDBW-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512CDBW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -197,7 +197,7 @@ ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 -; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -219,7 +219,7 @@ ; AVX512VPOPCNTDQ: # BB#0: ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0 @@ -233,7 +233,7 @@ ; AVX512CD: # BB#0: ; AVX512CD-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512CD-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512CD-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512CD-NEXT: vplzcntd %zmm0, %zmm0 ; AVX512CD-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] ; AVX512CD-NEXT: vpsubd %zmm0, %zmm1, %zmm0 @@ -243,7 +243,7 @@ ; AVX512CDBW: # BB#0: ; AVX512CDBW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512CDBW-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512CDBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512CDBW-NEXT: vplzcntd %zmm0, %zmm0 ; AVX512CDBW-NEXT: vpbroadcastd {{.*#+}} zmm1 = [31,31,31,31,31,31,31,31,31,31,31,31,31,31,31,31] ; AVX512CDBW-NEXT: vpsubd %zmm0, %zmm1, %zmm0 @@ -253,7 +253,7 @@ ; AVX512BW: # BB#0: ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpsubd %zmm0, %zmm1, %zmm2 -; AVX512BW-NEXT: vpandd %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 ; AVX512BW-NEXT: vpaddd %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] @@ -275,7 +275,7 @@ ; AVX512VPOPCNTDQ: # BB#0: ; AVX512VPOPCNTDQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VPOPCNTDQ-NEXT: vpsubd %zmm0, %zmm1, %zmm1 -; AVX512VPOPCNTDQ-NEXT: vpandd %zmm1, %zmm0, %zmm0 +; AVX512VPOPCNTDQ-NEXT: vpandq %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 ; AVX512VPOPCNTDQ-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; AVX512VPOPCNTDQ-NEXT: vpopcntd %zmm0, %zmm0