Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -5562,26 +5562,70 @@ } // AVX-512 conflict detection +// count the number of leading zero bits let TargetPrefix = "x86" in { + def int_x86_avx512_mask_conflict_d_128 : + GCCBuiltin<"__builtin_ia32_vpconflictsi_128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_conflict_d_256 : + GCCBuiltin<"__builtin_ia32_vpconflictsi_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; def int_x86_avx512_mask_conflict_d_512 : GCCBuiltin<"__builtin_ia32_vpconflictsi_512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_i16_ty], + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_conflict_q_128 : + GCCBuiltin<"__builtin_ia32_vpconflictdi_128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_conflict_q_256 : + GCCBuiltin<"__builtin_ia32_vpconflictdi_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_conflict_q_512 : GCCBuiltin<"__builtin_ia32_vpconflictdi_512_mask">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_i8_ty], + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_lzcnt_d_128 : + GCCBuiltin<"__builtin_ia32_vplzcntd_128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_lzcnt_d_256 : + GCCBuiltin<"__builtin_ia32_vplzcntd_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_lzcnt_d_512 : GCCBuiltin<"__builtin_ia32_vplzcntd_512_mask">, - Intrinsic<[llvm_v16i32_ty], [llvm_v16i32_ty, - llvm_v16i32_ty, llvm_i16_ty], + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_lzcnt_q_128 : + GCCBuiltin<"__builtin_ia32_vplzcntq_128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_lzcnt_q_256 : + GCCBuiltin<"__builtin_ia32_vplzcntq_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_lzcnt_q_512 : GCCBuiltin<"__builtin_ia32_vplzcntq_512_mask">, - Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, - llvm_v8i64_ty, llvm_i8_ty], + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -236,6 +236,9 @@ // Integer absolute value ABS, + // Detect Conflicts Within a Vector + CONFLICT, + /// Floating point max and min. FMAX, FMIN, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -1481,6 +1481,12 @@ setOperationAction(ISD::CTLZ, MVT::v8i64, Legal); setOperationAction(ISD::CTLZ, MVT::v16i32, Legal); } + if (Subtarget->hasVLX() && Subtarget->hasCDI()) { + setOperationAction(ISD::CTLZ, MVT::v4i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v8i32, Legal); + setOperationAction(ISD::CTLZ, MVT::v2i64, Legal); + setOperationAction(ISD::CTLZ, MVT::v4i32, Legal); + } if (Subtarget->hasDQI()) { setOperationAction(ISD::MUL, MVT::v2i64, Legal); setOperationAction(ISD::MUL, MVT::v4i64, Legal); @@ -19051,6 +19057,7 @@ case X86ISD::FHADD: return "X86ISD::FHADD"; case X86ISD::FHSUB: return "X86ISD::FHSUB"; case X86ISD::ABS: return "X86ISD::ABS"; + case X86ISD::CONFLICT: return "X86ISD::CONFLICT"; case X86ISD::FMAX: return "X86ISD::FMAX"; case X86ISD::FMAX_RND: return "X86ISD::FMAX_RND"; case X86ISD::FMIN: return "X86ISD::FMIN"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -6079,120 +6079,6 @@ def v16i1sextv16i32 : PatLeaf<(v16i32 (X86vsrai VR512:$src, (i8 31)))>; def v8i1sextv8i64 : PatLeaf<(v8i64 (X86vsrai VR512:$src, (i8 63)))>; -multiclass avx512_conflict opc, string OpcodeStr, - RegisterClass RC, RegisterClass KRC, - X86MemOperand x86memop, - X86MemOperand x86scalar_mop, string BrdcstStr> { - let hasSideEffects = 0 in { - def rr : AVX5128I, EVEX; - let mayLoad = 1 in - def rm : AVX5128I, EVEX; - let mayLoad = 1 in - def rmb : AVX5128I, EVEX, EVEX_B; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in - def rmkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in - def rmbkz : AVX5128I, EVEX, EVEX_KZ, EVEX_B; - - let Constraints = "$src1 = $dst" in { - def rrk : AVX5128I, EVEX, EVEX_K; - let mayLoad = 1 in - def rmk : AVX5128I, EVEX, EVEX_K; - let mayLoad = 1 in - def rmbk : AVX5128I, EVEX, EVEX_K, EVEX_B; - } - } -} - -let Predicates = [HasCDI] in { -defm VPCONFLICTD : avx512_conflict<0xC4, "vpconflictd", VR512, VK16WM, - i512mem, i32mem, "{1to16}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - - -defm VPCONFLICTQ : avx512_conflict<0xC4, "vpconflictq", VR512, VK8WM, - i512mem, i64mem, "{1to8}">, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -} - -def : Pat<(int_x86_avx512_mask_conflict_d_512 VR512:$src2, VR512:$src1, - GR16:$mask), - (VPCONFLICTDrrk VR512:$src1, - (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; - -def : Pat<(int_x86_avx512_mask_conflict_q_512 VR512:$src2, VR512:$src1, - GR8:$mask), - (VPCONFLICTQrrk VR512:$src1, - (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; - -let Predicates = [HasCDI] in { -defm VPLZCNTD : avx512_conflict<0x44, "vplzcntd", VR512, VK16WM, - i512mem, i32mem, "{1to16}">, - EVEX_V512, EVEX_CD8<32, CD8VF>; - - -defm VPLZCNTQ : avx512_conflict<0x44, "vplzcntq", VR512, VK8WM, - i512mem, i64mem, "{1to8}">, - EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -} - -def : Pat<(int_x86_avx512_mask_lzcnt_d_512 VR512:$src2, VR512:$src1, - GR16:$mask), - (VPLZCNTDrrk VR512:$src1, - (v16i1 (COPY_TO_REGCLASS GR16:$mask, VK16WM)), VR512:$src2)>; - -def : Pat<(int_x86_avx512_mask_lzcnt_q_512 VR512:$src2, VR512:$src1, - GR8:$mask), - (VPLZCNTQrrk VR512:$src1, - (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src2)>; - -def : Pat<(v16i32 (ctlz (loadv16i32 addr:$src))), - (VPLZCNTDrm addr:$src)>; -def : Pat<(v16i32 (ctlz (v16i32 VR512:$src))), - (VPLZCNTDrr VR512:$src)>; -def : Pat<(v8i64 (ctlz (loadv8i64 addr:$src))), - (VPLZCNTQrm addr:$src)>; -def : Pat<(v8i64 (ctlz (v8i64 VR512:$src))), - (VPLZCNTQrr VR512:$src)>; - def : Pat<(store (i1 -1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 1), addr:$dst), (MOV8mi addr:$dst, (i8 1))>; def : Pat<(store (i1 0), addr:$dst), (MOV8mi addr:$dst, (i8 0))>; @@ -6753,6 +6639,9 @@ (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), (VPABSQZrr VR512:$src)>; +defm VPLZCNT : avx512_unary_rm_vl_dq<0x44, 0x44, "vplzcnt", ctlz, HasCDI>; +defm VPCONFLICT : avx512_unary_rm_vl_dq<0xC4, 0xC4, "vpconflict", X86Conflict, HasCDI>; + //===----------------------------------------------------------------------===// // AVX-512 - Unpack Instructions //===----------------------------------------------------------------------===// Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -258,7 +258,9 @@ def X86PAlignr : SDNode<"X86ISD::PALIGNR", SDTShuff3OpI>; def X86VAlign : SDNode<"X86ISD::VALIGN", SDTShuff3OpI>; -def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; + +def X86Abs : SDNode<"X86ISD::ABS", SDTIntUnaryOp>; +def X86Conflict : SDNode<"X86ISD::CONFLICT", SDTIntUnaryOp>; def X86PShufd : SDNode<"X86ISD::PSHUFD", SDTShuff2OpI>; def X86PShufhw : SDNode<"X86ISD::PSHUFHW", SDTShuff2OpI>; Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -453,7 +453,18 @@ X86ISD::COMPRESS, 0), X86_INTRINSIC_DATA(avx512_mask_compress_q_512, COMPRESS_EXPAND_IN_REG, X86ISD::COMPRESS, 0), - + X86_INTRINSIC_DATA(avx512_mask_conflict_d_128, INTR_TYPE_1OP_MASK, + X86ISD::CONFLICT, 0), + X86_INTRINSIC_DATA(avx512_mask_conflict_d_256, INTR_TYPE_1OP_MASK, + X86ISD::CONFLICT, 0), + X86_INTRINSIC_DATA(avx512_mask_conflict_d_512, INTR_TYPE_1OP_MASK, + X86ISD::CONFLICT, 0), + X86_INTRINSIC_DATA(avx512_mask_conflict_q_128, INTR_TYPE_1OP_MASK, + X86ISD::CONFLICT, 0), + X86_INTRINSIC_DATA(avx512_mask_conflict_q_256, INTR_TYPE_1OP_MASK, + X86ISD::CONFLICT, 0), + X86_INTRINSIC_DATA(avx512_mask_conflict_q_512, INTR_TYPE_1OP_MASK, + X86ISD::CONFLICT, 0), X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_128, INTR_TYPE_1OP_MASK, X86ISD::CVTDQ2PD, 0), X86_INTRINSIC_DATA(avx512_mask_cvtdq2pd_256, INTR_TYPE_1OP_MASK, @@ -662,7 +673,19 @@ X86_INTRINSIC_DATA(avx512_mask_getexp_sd, INTR_TYPE_SCALAR_MASK_RM, X86ISD::FGETEXP_RND, 0), X86_INTRINSIC_DATA(avx512_mask_getexp_ss, INTR_TYPE_SCALAR_MASK_RM, - X86ISD::FGETEXP_RND, 0), + X86ISD::FGETEXP_RND, 0), + X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK, + ISD::CTLZ, 0), + X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK, + ISD::CTLZ, 0), + X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_512, INTR_TYPE_1OP_MASK, + ISD::CTLZ, 0), + X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_128, INTR_TYPE_1OP_MASK, + ISD::CTLZ, 0), + X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_256, INTR_TYPE_1OP_MASK, + ISD::CTLZ, 0), + X86_INTRINSIC_DATA(avx512_mask_lzcnt_q_512, INTR_TYPE_1OP_MASK, + ISD::CTLZ, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_128, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_256, INTR_TYPE_2OP_MASK, X86ISD::FMAX, 0), X86_INTRINSIC_DATA(avx512_mask_max_pd_512, INTR_TYPE_2OP_MASK, X86ISD::FMAX, @@ -1238,7 +1261,6 @@ X86_INTRINSIC_DATA(avx512_mask_vfnmsub_ps_512, FMA_OP_MASK, X86ISD::FNMSUB, X86ISD::FNMSUB_RND), - X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_128, VPERM_3OP_MASK, X86ISD::VPERMIV3, 0), X86_INTRINSIC_DATA(avx512_mask_vpermi2var_d_256, VPERM_3OP_MASK, Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -100,7 +100,7 @@ define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) { ; CHECK-LABEL: test_sqrt_pd_512 ; CHECK: vsqrtpd - %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) + %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ret <8 x double> %res } declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone @@ -108,13 +108,13 @@ define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) { ; CHECK-LABEL: test_sqrt_ps_512 ; CHECK: vsqrtps - %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ret <16 x float> %res } define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) { ; CHECK-LABEL: test_sqrt_round_ps_512 ; CHECK: vsqrtps {rz-sae} - %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) + %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 3) ret <16 x float> %res } declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone @@ -122,13 +122,13 @@ define <8 x double> @test_getexp_pd_512(<8 x double> %a0) { ; CHECK-LABEL: test_getexp_pd_512 ; CHECK: vgetexppd - %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 4) ret <8 x double> %res } define <8 x double> @test_getexp_round_pd_512(<8 x double> %a0) { ; CHECK-LABEL: test_getexp_round_pd_512 ; CHECK: vgetexppd {sae} - %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) + %res = call <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double> %a0, <8 x double> zeroinitializer, i8 -1, i32 8) ret <8 x double> %res } declare <8 x double> @llvm.x86.avx512.mask.getexp.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone @@ -136,14 +136,14 @@ define <16 x float> @test_getexp_ps_512(<16 x float> %a0) { ; CHECK-LABEL: test_getexp_ps_512 ; CHECK: vgetexpps - %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) + %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 4) ret <16 x float> %res } define <16 x float> @test_getexp_round_ps_512(<16 x float> %a0) { ; CHECK-LABEL: test_getexp_round_ps_512 ; CHECK: vgetexpps {sae} - %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) + %res = call <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 8) ret <16 x float> %res } declare <16 x float> @llvm.x86.avx512.mask.getexp.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone @@ -287,9 +287,10 @@ declare <8 x i64> @llvm.x86.avx512.pbroadcastq.i64.512(i64) nounwind readonly define <16 x i32> @test_conflict_d(<16 x i32> %a) { - ; CHECK: movw $-1, %ax - ; CHECK: vpxor - ; CHECK: vpconflictd +; CHECK-LABEL: test_conflict_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vpconflictd %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0xc4,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } @@ -297,9 +298,10 @@ declare <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly define <8 x i64> @test_conflict_q(<8 x i64> %a) { - ; CHECK: movb $-1, %al - ; CHECK: vpxor - ; CHECK: vpconflictq +; CHECK-LABEL: test_conflict_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vpconflictq %zmm0, %zmm0 +; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) ret <8 x i64> %res } @@ -307,21 +309,32 @@ declare <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64>, <8 x i64>, i8) nounwind readonly define <16 x i32> @test_maskz_conflict_d(<16 x i32> %a, i16 %mask) { - ; CHECK: vpconflictd +; CHECK-LABEL: test_maskz_conflict_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpconflictd %zmm0, %zmm0 {%k1} {z} +; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.conflict.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 %mask) ret <16 x i32> %res } define <8 x i64> @test_mask_conflict_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { - ; CHECK: vpconflictq +; CHECK-LABEL: test_mask_conflict_q: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpconflictq %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.conflict.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret <8 x i64> %res } define <16 x i32> @test_lzcnt_d(<16 x i32> %a) { - ; CHECK: movw $-1, %ax - ; CHECK: vpxor - ; CHECK: vplzcntd +; CHECK-LABEL: test_lzcnt_d: +; CHECK: ## BB#0: +; CHECK-NEXT: vplzcntd %zmm0, %zmm0 +; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> zeroinitializer, i16 -1) ret <16 x i32> %res } @@ -329,9 +342,10 @@ declare <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32>, <16 x i32>, i16) nounwind readonly define <8 x i64> @test_lzcnt_q(<8 x i64> %a) { - ; CHECK: movb $-1, %al - ; CHECK: vpxor - ; CHECK: vplzcntq +; CHECK-LABEL: test_lzcnt_q: +; CHECK: ## BB#0: +; CHECK-NEXT: vplzcntq %zmm0, %zmm0 +; CHECK-NEXT: retq %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> zeroinitializer, i8 -1) ret <8 x i64> %res } @@ -340,13 +354,24 @@ define <16 x i32> @test_mask_lzcnt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) { - ; CHECK: vplzcntd +; CHECK-LABEL: test_mask_lzcnt_d: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vplzcntd %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.lzcnt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask) ret <16 x i32> %res } define <8 x i64> @test_mask_lzcnt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) { - ; CHECK: vplzcntq +; CHECK-LABEL: test_mask_lzcnt_q: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vplzcntq %zmm0, %zmm1 {%k1} +; CHECK-NEXT: vmovaps %zmm1, %zmm0 +; CHECK-NEXT: retq ## encoding: [0xc3] %res = call <8 x i64> @llvm.x86.avx512.mask.lzcnt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask) ret <8 x i64> %res } @@ -442,9 +467,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_d_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vpabsd{{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsd{{.*}}{%k1} define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) { %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1) @@ -455,9 +480,9 @@ declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_pabs_q_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vpabsq{{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpabsq{{.*}}{%k1} define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) { %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1) @@ -2205,26 +2230,26 @@ define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_add_round_ps_rn_sae - ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) ret <16 x float> %res } define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_add_round_ps_rd_sae - ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) ret <16 x float> %res } define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_add_round_ps_ru_sae - ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) ret <16 x float> %res } define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_add_round_ps_rz_sae - ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) ret <16 x float> %res } @@ -2232,7 +2257,7 @@ define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_add_round_ps_current - ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vaddps %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) ret <16 x float> %res } @@ -2274,26 +2299,26 @@ define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rn_sae - ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) ret <16 x float> %res } define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rd_sae - ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) ret <16 x float> %res } define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_sub_round_ps_ru_sae - ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) ret <16 x float> %res } define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_sub_round_ps_rz_sae - ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) ret <16 x float> %res } @@ -2301,7 +2326,7 @@ define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_sub_round_ps_current - ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vsubps %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) ret <16 x float> %res } @@ -2375,26 +2400,26 @@ define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_div_round_ps_rn_sae - ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0) ret <16 x float> %res } define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_div_round_ps_rd_sae - ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1) ret <16 x float> %res } define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_div_round_ps_ru_sae - ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2) ret <16 x float> %res } define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_div_round_ps_rz_sae - ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3) ret <16 x float> %res } @@ -2402,7 +2427,7 @@ define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_div_round_ps_current - ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vdivps %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) ret <16 x float> %res } @@ -2465,7 +2490,7 @@ define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_min_round_ps_current - ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vminps %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) ret <16 x float> %res } @@ -2508,14 +2533,14 @@ define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) { ;CHECK-LABEL: test_mm512_mask_max_round_ps_current - ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1} + ;CHECK: vmaxps %zmm1, %zmm0, %zmm2 {%k1} %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4) ret <16 x float> %res } define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) { ;CHECK-LABEL: test_mm512_max_round_ps_sae - ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 + ;CHECK: vmaxps {sae}, %zmm1, %zmm0, %zmm0 %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8) ret <16 x float> %res } @@ -2720,8 +2745,8 @@ define <2 x double> @test_x86_avx512_cvtsi2sd32(<2 x double> %a, i32 %b) { ; CHECK-LABEL: test_x86_avx512_cvtsi2sd32: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtsi2sdl %edi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd32(<2 x double> %a, i32 %b, i32 3) ; <<<2 x double>> [#uses=1] ret <2 x double> %res } @@ -2730,8 +2755,8 @@ define <2 x double> @test_x86_avx512_cvtsi2sd64(<2 x double> %a, i64 %b) { ; CHECK-LABEL: test_x86_avx512_cvtsi2sd64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtsi2sdq %rdi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq %res = call <2 x double> @llvm.x86.avx512.cvtsi2sd64(<2 x double> %a, i64 %b, i32 3) ; <<<2 x double>> [#uses=1] ret <2 x double> %res } @@ -2740,8 +2765,8 @@ define <4 x float> @test_x86_avx512_cvtsi2ss32(<4 x float> %a, i32 %b) { ; CHECK-LABEL: test_x86_avx512_cvtsi2ss32: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtsi2ssl %edi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss32(<4 x float> %a, i32 %b, i32 3) ; <<<4 x float>> [#uses=1] ret <4 x float> %res } @@ -2750,8 +2775,8 @@ define <4 x float> @test_x86_avx512_cvtsi2ss64(<4 x float> %a, i64 %b) { ; CHECK-LABEL: test_x86_avx512_cvtsi2ss64: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtsi2ssq %rdi, {rz-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq %res = call <4 x float> @llvm.x86.avx512.cvtsi2ss64(<4 x float> %a, i64 %b, i32 3) ; <<<4 x float>> [#uses=1] ret <4 x float> %res } @@ -2760,8 +2785,8 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss (<4 x float> %a, i32 %b) ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2ssl %edi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] ret <4 x float> %res @@ -2770,9 +2795,9 @@ define <4 x float> @test_x86_avx512__mm_cvt_roundu32_ss_mem(<4 x float> %a, i32* %ptr) ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu32_ss_mem: ; CHECK: ## BB#0: -; CHECK-NEXT: movl (%rdi), %eax -; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: movl (%rdi), %eax +; CHECK-NEXT: vcvtusi2ssl %eax, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq { %b = load i32, i32* %ptr %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 1) ; <<<4 x float>> [#uses=1] @@ -2782,8 +2807,8 @@ define <4 x float> @test_x86_avx512__mm_cvtu32_ss(<4 x float> %a, i32 %b) ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] ret <4 x float> %res @@ -2793,7 +2818,7 @@ ; CHECK-LABEL: test_x86_avx512__mm_cvtu32_ss_mem: ; CHECK: ## BB#0: ; CHECK-NEXT: vcvtusi2ssl (%rdi), %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: retq { %b = load i32, i32* %ptr %res = call <4 x float> @llvm.x86.avx512.cvtusi2ss(<4 x float> %a, i32 %b, i32 4) ; <<<4 x float>> [#uses=1] @@ -2804,8 +2829,8 @@ define <4 x float> @_mm_cvt_roundu64_ss (<4 x float> %a, i64 %b) ; CHECK-LABEL: _mm_cvt_roundu64_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2ssq %rdi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 1) ; <<<4 x float>> [#uses=1] ret <4 x float> %res @@ -2814,8 +2839,8 @@ define <4 x float> @_mm_cvtu64_ss(<4 x float> %a, i64 %b) ; CHECK-LABEL: _mm_cvtu64_ss: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <4 x float> @llvm.x86.avx512.cvtusi642ss(<4 x float> %a, i64 %b, i32 4) ; <<<4 x float>> [#uses=1] ret <4 x float> %res @@ -2825,8 +2850,8 @@ define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b) ; CHECK-LABEL: test_x86_avx512_mm_cvtu32_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1] ret <2 x double> %res @@ -2836,8 +2861,8 @@ define <2 x double> @test_x86_avx512_mm_cvtu64_sd(<2 x double> %a, i64 %b) ; CHECK-LABEL: test_x86_avx512_mm_cvtu64_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2sdq %rdi, {rd-sae}, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 1) ; <<<2 x double>> [#uses=1] ret <2 x double> %res @@ -2846,8 +2871,8 @@ define <2 x double> @test_x86_avx512__mm_cvt_roundu64_sd(<2 x double> %a, i64 %b) ; CHECK-LABEL: test_x86_avx512__mm_cvt_roundu64_sd: ; CHECK: ## BB#0: -; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 -; CHECK-NEXT: retq +; CHECK-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0 +; CHECK-NEXT: retq { %res = call <2 x double> @llvm.x86.avx512.cvtusi642sd(<2 x double> %a, i64 %b, i32 4) ; <<<2 x double>> [#uses=1] ret <2 x double> %res @@ -2879,9 +2904,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_d_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpmaxsd %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -2890,9 +2915,9 @@ } ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxs_q_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpmaxsq %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -2903,9 +2928,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_d_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpmaxud %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -2916,9 +2941,9 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_pmaxu_q_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpmaxuq %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -2929,9 +2954,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_d_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpminsd %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -2942,9 +2967,9 @@ declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_pmins_q_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpminsq %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -2953,9 +2978,9 @@ } ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_d_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpminud %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -2966,9 +2991,9 @@ declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_pminu_q_512 -; CHECK-NOT: call +; CHECK-NOT: call ; CHECK: vpminuq %zmm -; CHECK: {%k1} +; CHECK: {%k1} define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -2979,9 +3004,9 @@ declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_d_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vpermi2d {{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpermi2d {{.*}}{%k1} define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) @@ -2992,9 +3017,9 @@ declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_pd_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vpermi2pd {{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpermi2pd {{.*}}{%k1} define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) { %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1) @@ -3005,9 +3030,9 @@ declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_ps_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vpermi2ps {{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpermi2ps {{.*}}{%k1} define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) { %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1) @@ -3018,9 +3043,9 @@ declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermi2var_q_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vpermi2q {{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vpermi2q {{.*}}{%k1} define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) @@ -3031,8 +3056,8 @@ declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_d_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vpermt2d {{.*}}{%k1} {z} define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -3044,8 +3069,8 @@ declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8) ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_pd_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vpermt2pd {{.*}}{%k1} {z} define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) @@ -3057,8 +3082,8 @@ declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16) ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_ps_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vpermt2ps {{.*}}{%k1} {z} define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) @@ -3071,8 +3096,8 @@ declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) ; CHECK-LABEL: @test_int_x86_avx512_maskz_vpermt2var_q_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vpermt2q {{.*}}{%k1} {z} define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) @@ -3084,8 +3109,8 @@ declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) ; CHECK-LABEL: @test_int_x86_avx512_mask_vpermt2var_d_512 -; CHECK-NOT: call -; CHECK: kmov +; CHECK-NOT: call +; CHECK: kmov ; CHECK: vpermt2d {{.*}}{%k1} ; CHECK-NOT: {z} define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { @@ -3097,9 +3122,9 @@ declare <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_pd_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vscalefpd{{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefpd{{.*}}{%k1} define <8 x double>@test_int_x86_avx512_mask_scalef_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { %res = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3, i32 3) %res1 = call <8 x double> @llvm.x86.avx512.mask.scalef.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1, i32 0) @@ -3109,9 +3134,9 @@ declare <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ps_512 -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vscalefps{{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefps{{.*}}{%k1} define <16 x float>@test_int_x86_avx512_mask_scalef_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { %res = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3, i32 2) %res1 = call <16 x float> @llvm.x86.avx512.mask.scalef.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1, i32 0) @@ -3186,7 +3211,7 @@ ; CHECK-NEXT: kmovw %eax, %k1 ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z} -; CHECK-NEXT: vpunpcklqdq {{.*#+}} +; CHECK-NEXT: vpunpcklqdq {{.*#+}} ; CHECK: vpaddq %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 ; CHECK-NEXT: retq @@ -3238,7 +3263,7 @@ ; CHECK: ## BB#0: ; CHECK-NEXT: kmovw %edi, %k1 ; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} -; CHECK-NEXT: vpunpckldq {{.*#+}} +; CHECK-NEXT: vpunpckldq {{.*#+}} ; CHECK: vpaddd %zmm0, %zmm2, %zmm0 ; CHECK-NEXT: retq %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) @@ -3873,9 +3898,9 @@ declare <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float>, <4 x float>,<4 x float>, i8, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_ss -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vscalefss {{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefss {{.*}}{%k1} ; CHECK: vscalefss {rn-sae} define <4 x float>@test_int_x86_avx512_mask_scalef_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) { %res = call <4 x float> @llvm.x86.avx512.mask.scalef.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4, i32 4) @@ -3886,9 +3911,9 @@ declare <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double>, <2 x double>,<2 x double>, i8, i32) ; CHECK-LABEL: @test_int_x86_avx512_mask_scalef_sd -; CHECK-NOT: call -; CHECK: kmov -; CHECK: vscalefsd {{.*}}{%k1} +; CHECK-NOT: call +; CHECK: kmov +; CHECK: vscalefsd {{.*}}{%k1} ; CHECK: vscalefsd {rn-sae} define <2 x double>@test_int_x86_avx512_mask_scalef_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) { %res = call <2 x double> @llvm.x86.avx512.mask.scalef.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4, i32 4) Index: test/CodeGen/X86/avx512cdvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512cdvl-intrinsics.ll +++ test/CodeGen/X86/avx512cdvl-intrinsics.ll @@ -0,0 +1,216 @@ +; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl| FileCheck %s + +define <8 x i32> @test_ctlz_d_256(<8 x i32> %a) { +; CHECK-LABEL: test_ctlz_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vplzcntd %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.ctlz.v8i32(<8 x i32> %a, i1 false) + ret <8 x i32> %res +} + +declare <8 x i32> @llvm.ctlz.v8i32(<8 x i32>, i1) nounwind readonly + +define <4 x i32> @test_ctlz_d_128(<4 x i32> %a) { +; CHECK-LABEL: test_ctlz_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vplzcntd %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.ctlz.v4i32(<4 x i32> %a, i1 false) + ret <4 x i32> %res +} + +declare <4 x i32> @llvm.ctlz.v4i32(<4 x i32>, i1) nounwind readonly + +define <4 x i64> @test_ctlz_q_256(<4 x i64> %a) { +; CHECK-LABEL: test_ctlz_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: vplzcntq %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.ctlz.v4i64(<4 x i64> %a, i1 false) + ret <4 x i64> %res +} + +declare <4 x i64> @llvm.ctlz.v4i64(<4 x i64>, i1) nounwind readonly + +define <2 x i64> @test_ctlz_q_128(<2 x i64> %a) { +; CHECK-LABEL: test_ctlz_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: vplzcntq %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.ctlz.v2i64(<2 x i64> %a, i1 false) + ret <2 x i64> %res +} + +declare <2 x i64> @llvm.ctlz.v2i64(<2 x i64>, i1) nounwind readonly + +declare <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_vplzcnt_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vplzcntd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vplzcntd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vplzcntd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) + %res3 = call <4 x i32> @llvm.x86.avx512.mask.lzcnt.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res2 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_vplzcnt_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vplzcntd %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vplzcntd %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.lzcnt.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64>, <2 x i64>, i8) + +define <2 x i64>@test_int_x86_avx512_mask_vplzcnt_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vplzcntq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vplzcntq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.lzcnt.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64>, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_mask_vplzcnt_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vplzcnt_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vplzcntq %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vplzcntq %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.lzcnt.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_vpconflict_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpconflictd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpconflictd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpconflictd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: vpaddd %xmm2, %xmm0, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) + %res3 = call <4 x i32> @llvm.x86.avx512.mask.conflict.d.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %x2) + %res2 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_vpconflict_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpconflictd %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpconflictd %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.conflict.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64>, <2 x i64>, i8) + +define <2 x i64>@test_int_x86_avx512_mask_vpconflict_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpconflictq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpconflictq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.conflict.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64>, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_mask_vpconflict_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) { +; CHECK-LABEL: test_int_x86_avx512_mask_vpconflict_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpconflictq %ymm0, %ymm1 {%k1} +; CHECK-NEXT: vpconflictq %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.conflict.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + + + + + + + + + + + + + + + + + + + + + + + + + + + Index: test/MC/X86/x86-64-avx512cd.s =================================================================== --- test/MC/X86/x86-64-avx512cd.s +++ test/MC/X86/x86-64-avx512cd.s @@ -0,0 +1,450 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s + +// CHECK: vplzcntq %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xee] + vplzcntq %zmm22, %zmm21 + +// CHECK: vplzcntq %zmm22, %zmm21 {%k7} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4f,0x44,0xee] + vplzcntq %zmm22, %zmm21 {%k7} + +// CHECK: vplzcntq %zmm22, %zmm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xcf,0x44,0xee] + vplzcntq %zmm22, %zmm21 {%k7} {z} + +// CHECK: vplzcntq (%rcx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x29] + vplzcntq (%rcx), %zmm21 + +// CHECK: vplzcntq 291(%rax,%r14,8), %zmm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xac,0xf0,0x23,0x01,0x00,0x00] + vplzcntq 291(%rax,%r14,8), %zmm21 + +// CHECK: vplzcntq (%rcx){1to8}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x29] + vplzcntq (%rcx){1to8}, %zmm21 + +// CHECK: vplzcntq 4064(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00] + vplzcntq 4064(%rdx), %zmm21 + +// CHECK: vplzcntq 4096(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0x40] + vplzcntq 4096(%rdx), %zmm21 + +// CHECK: vplzcntq -4096(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x6a,0xc0] + vplzcntq -4096(%rdx), %zmm21 + +// CHECK: vplzcntq -4128(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff] + vplzcntq -4128(%rdx), %zmm21 + +// CHECK: vplzcntq 1016(%rdx){1to8}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x7f] + vplzcntq 1016(%rdx){1to8}, %zmm21 + +// CHECK: vplzcntq 1024(%rdx){1to8}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0x00,0x04,0x00,0x00] + vplzcntq 1024(%rdx){1to8}, %zmm21 + +// CHECK: vplzcntq -1024(%rdx){1to8}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x6a,0x80] + vplzcntq -1024(%rdx){1to8}, %zmm21 + +// CHECK: vplzcntq -1032(%rdx){1to8}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xaa,0xf8,0xfb,0xff,0xff] + vplzcntq -1032(%rdx){1to8}, %zmm21 + +// CHECK: vplzcntq %zmm27, %zmm23 +// CHECK: encoding: [0x62,0x82,0xfd,0x48,0x44,0xfb] + vplzcntq %zmm27, %zmm23 + +// CHECK: vplzcntq %zmm27, %zmm23 {%k5} +// CHECK: encoding: [0x62,0x82,0xfd,0x4d,0x44,0xfb] + vplzcntq %zmm27, %zmm23 {%k5} + +// CHECK: vplzcntq %zmm27, %zmm23 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xcd,0x44,0xfb] + vplzcntq %zmm27, %zmm23 {%k5} {z} + +// CHECK: vplzcntq (%rcx), %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x39] + vplzcntq (%rcx), %zmm23 + +// CHECK: vplzcntq 4660(%rax,%r14,8), %zmm23 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00] + vplzcntq 4660(%rax,%r14,8), %zmm23 + +// CHECK: vplzcntq (%rcx){1to8}, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x39] + vplzcntq (%rcx){1to8}, %zmm23 + +// CHECK: vplzcntq 4064(%rdx), %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0x0f,0x00,0x00] + vplzcntq 4064(%rdx), %zmm23 + +// CHECK: vplzcntq 4096(%rdx), %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0x40] + vplzcntq 4096(%rdx), %zmm23 + +// CHECK: vplzcntq -4096(%rdx), %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0x7a,0xc0] + vplzcntq -4096(%rdx), %zmm23 + +// CHECK: vplzcntq -4128(%rdx), %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0x44,0xba,0xe0,0xef,0xff,0xff] + vplzcntq -4128(%rdx), %zmm23 + +// CHECK: vplzcntq 1016(%rdx){1to8}, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x7f] + vplzcntq 1016(%rdx){1to8}, %zmm23 + +// CHECK: vplzcntq 1024(%rdx){1to8}, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0x00,0x04,0x00,0x00] + vplzcntq 1024(%rdx){1to8}, %zmm23 + +// CHECK: vplzcntq -1024(%rdx){1to8}, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0x7a,0x80] + vplzcntq -1024(%rdx){1to8}, %zmm23 + +// CHECK: vplzcntq -1032(%rdx){1to8}, %zmm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0x44,0xba,0xf8,0xfb,0xff,0xff] + vplzcntq -1032(%rdx){1to8}, %zmm23 + +// CHECK: vplzcntd %zmm22, %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0xce] + vplzcntd %zmm22, %zmm25 + +// CHECK: vplzcntd %zmm22, %zmm25 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x4a,0x44,0xce] + vplzcntd %zmm22, %zmm25 {%k2} + +// CHECK: vplzcntd %zmm22, %zmm25 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xca,0x44,0xce] + vplzcntd %zmm22, %zmm25 {%k2} {z} + +// CHECK: vplzcntd (%rcx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x09] + vplzcntd (%rcx), %zmm25 + +// CHECK: vplzcntd 291(%rax,%r14,8), %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00] + vplzcntd 291(%rax,%r14,8), %zmm25 + +// CHECK: vplzcntd (%rcx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x09] + vplzcntd (%rcx){1to16}, %zmm25 + +// CHECK: vplzcntd 4064(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0x0f,0x00,0x00] + vplzcntd 4064(%rdx), %zmm25 + +// CHECK: vplzcntd 4096(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0x40] + vplzcntd 4096(%rdx), %zmm25 + +// CHECK: vplzcntd -4096(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x4a,0xc0] + vplzcntd -4096(%rdx), %zmm25 + +// CHECK: vplzcntd -4128(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x44,0x8a,0xe0,0xef,0xff,0xff] + vplzcntd -4128(%rdx), %zmm25 + +// CHECK: vplzcntd 508(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x7f] + vplzcntd 508(%rdx){1to16}, %zmm25 + +// CHECK: vplzcntd 512(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0x00,0x02,0x00,0x00] + vplzcntd 512(%rdx){1to16}, %zmm25 + +// CHECK: vplzcntd -512(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x4a,0x80] + vplzcntd -512(%rdx){1to16}, %zmm25 + +// CHECK: vplzcntd -516(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0x44,0x8a,0xfc,0xfd,0xff,0xff] + vplzcntd -516(%rdx){1to16}, %zmm25 + +// CHECK: vplzcntd %zmm22, %zmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xee] + vplzcntd %zmm22, %zmm21 + +// CHECK: vplzcntd %zmm22, %zmm21 {%k3} +// CHECK: encoding: [0x62,0xa2,0x7d,0x4b,0x44,0xee] + vplzcntd %zmm22, %zmm21 {%k3} + +// CHECK: vplzcntd %zmm22, %zmm21 {%k3} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xcb,0x44,0xee] + vplzcntd %zmm22, %zmm21 {%k3} {z} + +// CHECK: vplzcntd (%rcx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x29] + vplzcntd (%rcx), %zmm21 + +// CHECK: vplzcntd 4660(%rax,%r14,8), %zmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x48,0x44,0xac,0xf0,0x34,0x12,0x00,0x00] + vplzcntd 4660(%rax,%r14,8), %zmm21 + +// CHECK: vplzcntd (%rcx){1to16}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x29] + vplzcntd (%rcx){1to16}, %zmm21 + +// CHECK: vplzcntd 4064(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0x0f,0x00,0x00] + vplzcntd 4064(%rdx), %zmm21 + +// CHECK: vplzcntd 4096(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0x40] + vplzcntd 4096(%rdx), %zmm21 + +// CHECK: vplzcntd -4096(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0x6a,0xc0] + vplzcntd -4096(%rdx), %zmm21 + +// CHECK: vplzcntd -4128(%rdx), %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x44,0xaa,0xe0,0xef,0xff,0xff] + vplzcntd -4128(%rdx), %zmm21 + +// CHECK: vplzcntd 508(%rdx){1to16}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x7f] + vplzcntd 508(%rdx){1to16}, %zmm21 + +// CHECK: vplzcntd 512(%rdx){1to16}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0x00,0x02,0x00,0x00] + vplzcntd 512(%rdx){1to16}, %zmm21 + +// CHECK: vplzcntd -512(%rdx){1to16}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0x6a,0x80] + vplzcntd -512(%rdx){1to16}, %zmm21 + +// CHECK: vplzcntd -516(%rdx){1to16}, %zmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x58,0x44,0xaa,0xfc,0xfd,0xff,0xff] + vplzcntd -516(%rdx){1to16}, %zmm21 + +// CHECK: vpconflictq %zmm25, %zmm20 +// CHECK: encoding: [0x62,0x82,0xfd,0x48,0xc4,0xe1] + vpconflictq %zmm25, %zmm20 + +// CHECK: vpconflictq %zmm25, %zmm20 {%k6} +// CHECK: encoding: [0x62,0x82,0xfd,0x4e,0xc4,0xe1] + vpconflictq %zmm25, %zmm20 {%k6} + +// CHECK: vpconflictq %zmm25, %zmm20 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xce,0xc4,0xe1] + vpconflictq %zmm25, %zmm20 {%k6} {z} + +// CHECK: vpconflictq (%rcx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x21] + vpconflictq (%rcx), %zmm20 + +// CHECK: vpconflictq 291(%rax,%r14,8), %zmm20 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpconflictq 291(%rax,%r14,8), %zmm20 + +// CHECK: vpconflictq (%rcx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x21] + vpconflictq (%rcx){1to8}, %zmm20 + +// CHECK: vpconflictq 4064(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0x0f,0x00,0x00] + vpconflictq 4064(%rdx), %zmm20 + +// CHECK: vpconflictq 4096(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0x40] + vpconflictq 4096(%rdx), %zmm20 + +// CHECK: vpconflictq -4096(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x62,0xc0] + vpconflictq -4096(%rdx), %zmm20 + +// CHECK: vpconflictq -4128(%rdx), %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0xa2,0xe0,0xef,0xff,0xff] + vpconflictq -4128(%rdx), %zmm20 + +// CHECK: vpconflictq 1016(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x7f] + vpconflictq 1016(%rdx){1to8}, %zmm20 + +// CHECK: vpconflictq 1024(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0x00,0x04,0x00,0x00] + vpconflictq 1024(%rdx){1to8}, %zmm20 + +// CHECK: vpconflictq -1024(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x62,0x80] + vpconflictq -1024(%rdx){1to8}, %zmm20 + +// CHECK: vpconflictq -1032(%rdx){1to8}, %zmm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0xa2,0xf8,0xfb,0xff,0xff] + vpconflictq -1032(%rdx){1to8}, %zmm20 + +// CHECK: vpconflictq %zmm21, %zmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0xcd] + vpconflictq %zmm21, %zmm17 + +// CHECK: vpconflictq %zmm21, %zmm17 {%k6} +// CHECK: encoding: [0x62,0xa2,0xfd,0x4e,0xc4,0xcd] + vpconflictq %zmm21, %zmm17 {%k6} + +// CHECK: vpconflictq %zmm21, %zmm17 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xce,0xc4,0xcd] + vpconflictq %zmm21, %zmm17 {%k6} {z} + +// CHECK: vpconflictq (%rcx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x09] + vpconflictq (%rcx), %zmm17 + +// CHECK: vpconflictq 4660(%rax,%r14,8), %zmm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x48,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpconflictq 4660(%rax,%r14,8), %zmm17 + +// CHECK: vpconflictq (%rcx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x09] + vpconflictq (%rcx){1to8}, %zmm17 + +// CHECK: vpconflictq 4064(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00] + vpconflictq 4064(%rdx), %zmm17 + +// CHECK: vpconflictq 4096(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0x40] + vpconflictq 4096(%rdx), %zmm17 + +// CHECK: vpconflictq -4096(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x4a,0xc0] + vpconflictq -4096(%rdx), %zmm17 + +// CHECK: vpconflictq -4128(%rdx), %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff] + vpconflictq -4128(%rdx), %zmm17 + +// CHECK: vpconflictq 1016(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x7f] + vpconflictq 1016(%rdx){1to8}, %zmm17 + +// CHECK: vpconflictq 1024(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0x00,0x04,0x00,0x00] + vpconflictq 1024(%rdx){1to8}, %zmm17 + +// CHECK: vpconflictq -1024(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x4a,0x80] + vpconflictq -1024(%rdx){1to8}, %zmm17 + +// CHECK: vpconflictq -1032(%rdx){1to8}, %zmm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x58,0xc4,0x8a,0xf8,0xfb,0xff,0xff] + vpconflictq -1032(%rdx){1to8}, %zmm17 + +// CHECK: vpconflictd %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xcb] + vpconflictd %zmm19, %zmm25 + +// CHECK: vpconflictd %zmm19, %zmm25 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xcb] + vpconflictd %zmm19, %zmm25 {%k4} + +// CHECK: vpconflictd %zmm19, %zmm25 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xcb] + vpconflictd %zmm19, %zmm25 {%k4} {z} + +// CHECK: vpconflictd (%rcx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x09] + vpconflictd (%rcx), %zmm25 + +// CHECK: vpconflictd 291(%rax,%r14,8), %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpconflictd 291(%rax,%r14,8), %zmm25 + +// CHECK: vpconflictd (%rcx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x09] + vpconflictd (%rcx){1to16}, %zmm25 +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd --show-encoding %s | FileCheck %s + +// CHECK: vpconflictd 4064(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0x0f,0x00,0x00] + vpconflictd 4064(%rdx), %zmm25 + +// CHECK: vpconflictd 4096(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0x40] + vpconflictd 4096(%rdx), %zmm25 + +// CHECK: vpconflictd -4096(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x4a,0xc0] + vpconflictd -4096(%rdx), %zmm25 + +// CHECK: vpconflictd -4128(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x8a,0xe0,0xef,0xff,0xff] + vpconflictd -4128(%rdx), %zmm25 + +// CHECK: vpconflictd 508(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x7f] + vpconflictd 508(%rdx){1to16}, %zmm25 + +// CHECK: vpconflictd 512(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0x00,0x02,0x00,0x00] + vpconflictd 512(%rdx){1to16}, %zmm25 + +// CHECK: vpconflictd -512(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x4a,0x80] + vpconflictd -512(%rdx){1to16}, %zmm25 + +// CHECK: vpconflictd -516(%rdx){1to16}, %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x8a,0xfc,0xfd,0xff,0xff] + vpconflictd -516(%rdx){1to16}, %zmm25 + +// CHECK: vpconflictd %zmm21, %zmm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0xd5] + vpconflictd %zmm21, %zmm26 + +// CHECK: vpconflictd %zmm21, %zmm26 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0xc4,0xd5] + vpconflictd %zmm21, %zmm26 {%k4} + +// CHECK: vpconflictd %zmm21, %zmm26 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0xc4,0xd5] + vpconflictd %zmm21, %zmm26 {%k4} {z} + +// CHECK: vpconflictd (%rcx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x11] + vpconflictd (%rcx), %zmm26 + +// CHECK: vpconflictd 4660(%rax,%r14,8), %zmm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00] + vpconflictd 4660(%rax,%r14,8), %zmm26 + +// CHECK: vpconflictd (%rcx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x11] + vpconflictd (%rcx){1to16}, %zmm26 + +// CHECK: vpconflictd 4064(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0x0f,0x00,0x00] + vpconflictd 4064(%rdx), %zmm26 + +// CHECK: vpconflictd 4096(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0x40] + vpconflictd 4096(%rdx), %zmm26 + +// CHECK: vpconflictd -4096(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x52,0xc0] + vpconflictd -4096(%rdx), %zmm26 + +// CHECK: vpconflictd -4128(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0xc4,0x92,0xe0,0xef,0xff,0xff] + vpconflictd -4128(%rdx), %zmm26 + +// CHECK: vpconflictd 508(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x7f] + vpconflictd 508(%rdx){1to16}, %zmm26 + +// CHECK: vpconflictd 512(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0x00,0x02,0x00,0x00] + vpconflictd 512(%rdx){1to16}, %zmm26 + +// CHECK: vpconflictd -512(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x52,0x80] + vpconflictd -512(%rdx){1to16}, %zmm26 + +// CHECK: vpconflictd -516(%rdx){1to16}, %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x58,0xc4,0x92,0xfc,0xfd,0xff,0xff] + vpconflictd -516(%rdx){1to16}, %zmm26 Index: test/MC/X86/x86-64-avx512cd_vl.s =================================================================== --- test/MC/X86/x86-64-avx512cd_vl.s +++ test/MC/X86/x86-64-avx512cd_vl.s @@ -0,0 +1,898 @@ +// RUN: llvm-mc -triple x86_64-unknown-unknown -mcpu=knl -mattr=+avx512cd -mattr=+avx512vl --show-encoding %s | FileCheck %s + +// CHECK: vplzcntq %xmm20, %xmm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x44,0xd4] + vplzcntq %xmm20, %xmm18 + +// CHECK: vplzcntq %xmm20, %xmm18 {%k1} +// CHECK: encoding: [0x62,0xa2,0xfd,0x09,0x44,0xd4] + vplzcntq %xmm20, %xmm18 {%k1} + +// CHECK: vplzcntq %xmm20, %xmm18 {%k1} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0x89,0x44,0xd4] + vplzcntq %xmm20, %xmm18 {%k1} {z} + +// CHECK: vplzcntq (%rcx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x11] + vplzcntq (%rcx), %xmm18 + +// CHECK: vplzcntq 291(%rax,%r14,8), %xmm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00] + vplzcntq 291(%rax,%r14,8), %xmm18 + +// CHECK: vplzcntq (%rcx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x11] + vplzcntq (%rcx){1to2}, %xmm18 + +// CHECK: vplzcntq 2032(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x7f] + vplzcntq 2032(%rdx), %xmm18 + +// CHECK: vplzcntq 2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0x00,0x08,0x00,0x00] + vplzcntq 2048(%rdx), %xmm18 + +// CHECK: vplzcntq -2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x52,0x80] + vplzcntq -2048(%rdx), %xmm18 + +// CHECK: vplzcntq -2064(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff] + vplzcntq -2064(%rdx), %xmm18 + +// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x7f] + vplzcntq 1016(%rdx){1to2}, %xmm18 + +// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0x00,0x04,0x00,0x00] + vplzcntq 1024(%rdx){1to2}, %xmm18 + +// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x52,0x80] + vplzcntq -1024(%rdx){1to2}, %xmm18 + +// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0x44,0x92,0xf8,0xfb,0xff,0xff] + vplzcntq -1032(%rdx){1to2}, %xmm18 + +// CHECK: vplzcntq %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x44,0xee] + vplzcntq %ymm22, %ymm21 + +// CHECK: vplzcntq %ymm22, %ymm21 {%k7} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2f,0x44,0xee] + vplzcntq %ymm22, %ymm21 {%k7} + +// CHECK: vplzcntq %ymm22, %ymm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xaf,0x44,0xee] + vplzcntq %ymm22, %ymm21 {%k7} {z} + +// CHECK: vplzcntq (%rcx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x29] + vplzcntq (%rcx), %ymm21 + +// CHECK: vplzcntq 291(%rax,%r14,8), %ymm21 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x44,0xac,0xf0,0x23,0x01,0x00,0x00] + vplzcntq 291(%rax,%r14,8), %ymm21 + +// CHECK: vplzcntq (%rcx){1to4}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x29] + vplzcntq (%rcx){1to4}, %ymm21 + +// CHECK: vplzcntq 4064(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x7f] + vplzcntq 4064(%rdx), %ymm21 + +// CHECK: vplzcntq 4096(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0x00,0x10,0x00,0x00] + vplzcntq 4096(%rdx), %ymm21 + +// CHECK: vplzcntq -4096(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x6a,0x80] + vplzcntq -4096(%rdx), %ymm21 + +// CHECK: vplzcntq -4128(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff] + vplzcntq -4128(%rdx), %ymm21 + +// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x7f] + vplzcntq 1016(%rdx){1to4}, %ymm21 + +// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0x00,0x04,0x00,0x00] + vplzcntq 1024(%rdx){1to4}, %ymm21 + +// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x6a,0x80] + vplzcntq -1024(%rdx){1to4}, %ymm21 + +// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xaa,0xf8,0xfb,0xff,0xff] + vplzcntq -1032(%rdx){1to4}, %ymm21 + +// CHECK: vplzcntq %xmm20, %xmm24 +// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x44,0xc4] + vplzcntq %xmm20, %xmm24 + +// CHECK: vplzcntq %xmm20, %xmm24 {%k3} +// CHECK: encoding: [0x62,0x22,0xfd,0x0b,0x44,0xc4] + vplzcntq %xmm20, %xmm24 {%k3} + +// CHECK: vplzcntq %xmm20, %xmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x22,0xfd,0x8b,0x44,0xc4] + vplzcntq %xmm20, %xmm24 {%k3} {z} + +// CHECK: vplzcntq (%rcx), %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x01] + vplzcntq (%rcx), %xmm24 + +// CHECK: vplzcntq 4660(%rax,%r14,8), %xmm24 +// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x44,0x84,0xf0,0x34,0x12,0x00,0x00] + vplzcntq 4660(%rax,%r14,8), %xmm24 + +// CHECK: vplzcntq (%rcx){1to2}, %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x01] + vplzcntq (%rcx){1to2}, %xmm24 + +// CHECK: vplzcntq 2032(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x7f] + vplzcntq 2032(%rdx), %xmm24 + +// CHECK: vplzcntq 2048(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0x00,0x08,0x00,0x00] + vplzcntq 2048(%rdx), %xmm24 + +// CHECK: vplzcntq -2048(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x42,0x80] + vplzcntq -2048(%rdx), %xmm24 + +// CHECK: vplzcntq -2064(%rdx), %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x44,0x82,0xf0,0xf7,0xff,0xff] + vplzcntq -2064(%rdx), %xmm24 + +// CHECK: vplzcntq 1016(%rdx){1to2}, %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x7f] + vplzcntq 1016(%rdx){1to2}, %xmm24 + +// CHECK: vplzcntq 1024(%rdx){1to2}, %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0x00,0x04,0x00,0x00] + vplzcntq 1024(%rdx){1to2}, %xmm24 + +// CHECK: vplzcntq -1024(%rdx){1to2}, %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x42,0x80] + vplzcntq -1024(%rdx){1to2}, %xmm24 + +// CHECK: vplzcntq -1032(%rdx){1to2}, %xmm24 +// CHECK: encoding: [0x62,0x62,0xfd,0x18,0x44,0x82,0xf8,0xfb,0xff,0xff] + vplzcntq -1032(%rdx){1to2}, %xmm24 + +// CHECK: vplzcntq %ymm27, %ymm23 +// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x44,0xfb] + vplzcntq %ymm27, %ymm23 + +// CHECK: vplzcntq %ymm27, %ymm23 {%k5} +// CHECK: encoding: [0x62,0x82,0xfd,0x2d,0x44,0xfb] + vplzcntq %ymm27, %ymm23 {%k5} + +// CHECK: vplzcntq %ymm27, %ymm23 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xad,0x44,0xfb] + vplzcntq %ymm27, %ymm23 {%k5} {z} + +// CHECK: vplzcntq (%rcx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x39] + vplzcntq (%rcx), %ymm23 + +// CHECK: vplzcntq 4660(%rax,%r14,8), %ymm23 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x44,0xbc,0xf0,0x34,0x12,0x00,0x00] + vplzcntq 4660(%rax,%r14,8), %ymm23 + +// CHECK: vplzcntq (%rcx){1to4}, %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x39] + vplzcntq (%rcx){1to4}, %ymm23 + +// CHECK: vplzcntq 4064(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x7f] + vplzcntq 4064(%rdx), %ymm23 + +// CHECK: vplzcntq 4096(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0x00,0x10,0x00,0x00] + vplzcntq 4096(%rdx), %ymm23 + +// CHECK: vplzcntq -4096(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0x7a,0x80] + vplzcntq -4096(%rdx), %ymm23 + +// CHECK: vplzcntq -4128(%rdx), %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x44,0xba,0xe0,0xef,0xff,0xff] + vplzcntq -4128(%rdx), %ymm23 + +// CHECK: vplzcntq 1016(%rdx){1to4}, %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x7f] + vplzcntq 1016(%rdx){1to4}, %ymm23 + +// CHECK: vplzcntq 1024(%rdx){1to4}, %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0x00,0x04,0x00,0x00] + vplzcntq 1024(%rdx){1to4}, %ymm23 + +// CHECK: vplzcntq -1024(%rdx){1to4}, %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0x7a,0x80] + vplzcntq -1024(%rdx){1to4}, %ymm23 + +// CHECK: vplzcntq -1032(%rdx){1to4}, %ymm23 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0x44,0xba,0xf8,0xfb,0xff,0xff] + vplzcntq -1032(%rdx){1to4}, %ymm23 + +// CHECK: vplzcntd %xmm26, %xmm26 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x44,0xd2] + vplzcntd %xmm26, %xmm26 + +// CHECK: vplzcntd %xmm26, %xmm26 {%k4} +// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x44,0xd2] + vplzcntd %xmm26, %xmm26 {%k4} + +// CHECK: vplzcntd %xmm26, %xmm26 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x44,0xd2] + vplzcntd %xmm26, %xmm26 {%k4} {z} + +// CHECK: vplzcntd (%rcx), %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x11] + vplzcntd (%rcx), %xmm26 + +// CHECK: vplzcntd 291(%rax,%r14,8), %xmm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x44,0x94,0xf0,0x23,0x01,0x00,0x00] + vplzcntd 291(%rax,%r14,8), %xmm26 + +// CHECK: vplzcntd (%rcx){1to4}, %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x11] + vplzcntd (%rcx){1to4}, %xmm26 + +// CHECK: vplzcntd 2032(%rdx), %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x7f] + vplzcntd 2032(%rdx), %xmm26 + +// CHECK: vplzcntd 2048(%rdx), %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0x00,0x08,0x00,0x00] + vplzcntd 2048(%rdx), %xmm26 + +// CHECK: vplzcntd -2048(%rdx), %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x52,0x80] + vplzcntd -2048(%rdx), %xmm26 + +// CHECK: vplzcntd -2064(%rdx), %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x92,0xf0,0xf7,0xff,0xff] + vplzcntd -2064(%rdx), %xmm26 + +// CHECK: vplzcntd 508(%rdx){1to4}, %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x7f] + vplzcntd 508(%rdx){1to4}, %xmm26 + +// CHECK: vplzcntd 512(%rdx){1to4}, %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0x00,0x02,0x00,0x00] + vplzcntd 512(%rdx){1to4}, %xmm26 + +// CHECK: vplzcntd -512(%rdx){1to4}, %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x52,0x80] + vplzcntd -512(%rdx){1to4}, %xmm26 + +// CHECK: vplzcntd -516(%rdx){1to4}, %xmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x92,0xfc,0xfd,0xff,0xff] + vplzcntd -516(%rdx){1to4}, %xmm26 + +// CHECK: vplzcntd %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x44,0xce] + vplzcntd %ymm22, %ymm25 + +// CHECK: vplzcntd %ymm22, %ymm25 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x2a,0x44,0xce] + vplzcntd %ymm22, %ymm25 {%k2} + +// CHECK: vplzcntd %ymm22, %ymm25 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaa,0x44,0xce] + vplzcntd %ymm22, %ymm25 {%k2} {z} + +// CHECK: vplzcntd (%rcx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x09] + vplzcntd (%rcx), %ymm25 + +// CHECK: vplzcntd 291(%rax,%r14,8), %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x44,0x8c,0xf0,0x23,0x01,0x00,0x00] + vplzcntd 291(%rax,%r14,8), %ymm25 + +// CHECK: vplzcntd (%rcx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x09] + vplzcntd (%rcx){1to8}, %ymm25 + +// CHECK: vplzcntd 4064(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x7f] + vplzcntd 4064(%rdx), %ymm25 + +// CHECK: vplzcntd 4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0x00,0x10,0x00,0x00] + vplzcntd 4096(%rdx), %ymm25 + +// CHECK: vplzcntd -4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x4a,0x80] + vplzcntd -4096(%rdx), %ymm25 + +// CHECK: vplzcntd -4128(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x44,0x8a,0xe0,0xef,0xff,0xff] + vplzcntd -4128(%rdx), %ymm25 + +// CHECK: vplzcntd 508(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x7f] + vplzcntd 508(%rdx){1to8}, %ymm25 + +// CHECK: vplzcntd 512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0x00,0x02,0x00,0x00] + vplzcntd 512(%rdx){1to8}, %ymm25 + +// CHECK: vplzcntd -512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x4a,0x80] + vplzcntd -512(%rdx){1to8}, %ymm25 + +// CHECK: vplzcntd -516(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0x44,0x8a,0xfc,0xfd,0xff,0xff] + vplzcntd -516(%rdx){1to8}, %ymm25 + +// CHECK: vplzcntd %xmm22, %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x44,0xf6] + vplzcntd %xmm22, %xmm30 + +// CHECK: vplzcntd %xmm22, %xmm30 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x0f,0x44,0xf6] + vplzcntd %xmm22, %xmm30 {%k7} + +// CHECK: vplzcntd %xmm22, %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8f,0x44,0xf6] + vplzcntd %xmm22, %xmm30 {%k7} {z} + +// CHECK: vplzcntd (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x31] + vplzcntd (%rcx), %xmm30 + +// CHECK: vplzcntd 4660(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x44,0xb4,0xf0,0x34,0x12,0x00,0x00] + vplzcntd 4660(%rax,%r14,8), %xmm30 + +// CHECK: vplzcntd (%rcx){1to4}, %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x31] + vplzcntd (%rcx){1to4}, %xmm30 + +// CHECK: vplzcntd 2032(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x7f] + vplzcntd 2032(%rdx), %xmm30 + +// CHECK: vplzcntd 2048(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0x00,0x08,0x00,0x00] + vplzcntd 2048(%rdx), %xmm30 + +// CHECK: vplzcntd -2048(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0x72,0x80] + vplzcntd -2048(%rdx), %xmm30 + +// CHECK: vplzcntd -2064(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x44,0xb2,0xf0,0xf7,0xff,0xff] + vplzcntd -2064(%rdx), %xmm30 + +// CHECK: vplzcntd 508(%rdx){1to4}, %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x7f] + vplzcntd 508(%rdx){1to4}, %xmm30 + +// CHECK: vplzcntd 512(%rdx){1to4}, %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0x00,0x02,0x00,0x00] + vplzcntd 512(%rdx){1to4}, %xmm30 + +// CHECK: vplzcntd -512(%rdx){1to4}, %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0x72,0x80] + vplzcntd -512(%rdx){1to4}, %xmm30 + +// CHECK: vplzcntd -516(%rdx){1to4}, %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0x44,0xb2,0xfc,0xfd,0xff,0xff] + vplzcntd -516(%rdx){1to4}, %xmm30 + +// CHECK: vplzcntd %ymm22, %ymm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x44,0xee] + vplzcntd %ymm22, %ymm21 + +// CHECK: vplzcntd %ymm22, %ymm21 {%k3} +// CHECK: encoding: [0x62,0xa2,0x7d,0x2b,0x44,0xee] + vplzcntd %ymm22, %ymm21 {%k3} + +// CHECK: vplzcntd %ymm22, %ymm21 {%k3} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0xab,0x44,0xee] + vplzcntd %ymm22, %ymm21 {%k3} {z} + +// CHECK: vplzcntd (%rcx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0x29] + vplzcntd (%rcx), %ymm21 + +// CHECK: vplzcntd 4660(%rax,%r14,8), %ymm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x44,0xac,0xf0,0x34,0x12,0x00,0x00] + vplzcntd 4660(%rax,%r14,8), %ymm21 + +// CHECK: vplzcntd (%rcx){1to8}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0x29] + vplzcntd (%rcx){1to8}, %ymm21 + +// CHECK: vplzcntd 4064(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x7f] + vplzcntd 4064(%rdx), %ymm21 + +// CHECK: vplzcntd 4096(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0x00,0x10,0x00,0x00] + vplzcntd 4096(%rdx), %ymm21 + +// CHECK: vplzcntd -4096(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0x6a,0x80] + vplzcntd -4096(%rdx), %ymm21 + +// CHECK: vplzcntd -4128(%rdx), %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x44,0xaa,0xe0,0xef,0xff,0xff] + vplzcntd -4128(%rdx), %ymm21 + +// CHECK: vplzcntd 508(%rdx){1to8}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x7f] + vplzcntd 508(%rdx){1to8}, %ymm21 + +// CHECK: vplzcntd 512(%rdx){1to8}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0x00,0x02,0x00,0x00] + vplzcntd 512(%rdx){1to8}, %ymm21 + +// CHECK: vplzcntd -512(%rdx){1to8}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0x6a,0x80] + vplzcntd -512(%rdx){1to8}, %ymm21 + +// CHECK: vplzcntd -516(%rdx){1to8}, %ymm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x38,0x44,0xaa,0xfc,0xfd,0xff,0xff] + vplzcntd -516(%rdx){1to8}, %ymm21 + +// CHECK: vpconflictq %xmm24, %xmm19 +// CHECK: encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd8] + vpconflictq %xmm24, %xmm19 + +// CHECK: vpconflictq %xmm24, %xmm19 {%k7} +// CHECK: encoding: [0x62,0x82,0xfd,0x0f,0xc4,0xd8] + vpconflictq %xmm24, %xmm19 {%k7} + +// CHECK: vpconflictq %xmm24, %xmm19 {%k7} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0x8f,0xc4,0xd8] + vpconflictq %xmm24, %xmm19 {%k7} {z} + +// CHECK: vpconflictq (%rcx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x19] + vpconflictq (%rcx), %xmm19 + +// CHECK: vpconflictq 291(%rax,%r14,8), %xmm19 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpconflictq 291(%rax,%r14,8), %xmm19 + +// CHECK: vpconflictq (%rcx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x19] + vpconflictq (%rcx){1to2}, %xmm19 + +// CHECK: vpconflictq 2032(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x7f] + vpconflictq 2032(%rdx), %xmm19 + +// CHECK: vpconflictq 2048(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00] + vpconflictq 2048(%rdx), %xmm19 + +// CHECK: vpconflictq -2048(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x5a,0x80] + vpconflictq -2048(%rdx), %xmm19 + +// CHECK: vpconflictq -2064(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff] + vpconflictq -2064(%rdx), %xmm19 + +// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x7f] + vpconflictq 1016(%rdx){1to2}, %xmm19 + +// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0x00,0x04,0x00,0x00] + vpconflictq 1024(%rdx){1to2}, %xmm19 + +// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x5a,0x80] + vpconflictq -1024(%rdx){1to2}, %xmm19 + +// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x9a,0xf8,0xfb,0xff,0xff] + vpconflictq -1032(%rdx){1to2}, %xmm19 + +// CHECK: vpconflictq %ymm25, %ymm20 +// CHECK: encoding: [0x62,0x82,0xfd,0x28,0xc4,0xe1] + vpconflictq %ymm25, %ymm20 + +// CHECK: vpconflictq %ymm25, %ymm20 {%k6} +// CHECK: encoding: [0x62,0x82,0xfd,0x2e,0xc4,0xe1] + vpconflictq %ymm25, %ymm20 {%k6} + +// CHECK: vpconflictq %ymm25, %ymm20 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xae,0xc4,0xe1] + vpconflictq %ymm25, %ymm20 {%k6} {z} + +// CHECK: vpconflictq (%rcx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x21] + vpconflictq (%rcx), %ymm20 + +// CHECK: vpconflictq 291(%rax,%r14,8), %ymm20 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpconflictq 291(%rax,%r14,8), %ymm20 + +// CHECK: vpconflictq (%rcx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x21] + vpconflictq (%rcx){1to4}, %ymm20 + +// CHECK: vpconflictq 4064(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x7f] + vpconflictq 4064(%rdx), %ymm20 + +// CHECK: vpconflictq 4096(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0x00,0x10,0x00,0x00] + vpconflictq 4096(%rdx), %ymm20 + +// CHECK: vpconflictq -4096(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x62,0x80] + vpconflictq -4096(%rdx), %ymm20 + +// CHECK: vpconflictq -4128(%rdx), %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0xa2,0xe0,0xef,0xff,0xff] + vpconflictq -4128(%rdx), %ymm20 + +// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x7f] + vpconflictq 1016(%rdx){1to4}, %ymm20 + +// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0x00,0x04,0x00,0x00] + vpconflictq 1024(%rdx){1to4}, %ymm20 + +// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x62,0x80] + vpconflictq -1024(%rdx){1to4}, %ymm20 + +// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm20 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0xa2,0xf8,0xfb,0xff,0xff] + vpconflictq -1032(%rdx){1to4}, %ymm20 + +// CHECK: vpconflictq %xmm27, %xmm18 +// CHECK: encoding: [0x62,0x82,0xfd,0x08,0xc4,0xd3] + vpconflictq %xmm27, %xmm18 + +// CHECK: vpconflictq %xmm27, %xmm18 {%k4} +// CHECK: encoding: [0x62,0x82,0xfd,0x0c,0xc4,0xd3] + vpconflictq %xmm27, %xmm18 {%k4} + +// CHECK: vpconflictq %xmm27, %xmm18 {%k4} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0x8c,0xc4,0xd3] + vpconflictq %xmm27, %xmm18 {%k4} {z} + +// CHECK: vpconflictq (%rcx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x11] + vpconflictq (%rcx), %xmm18 + +// CHECK: vpconflictq 4660(%rax,%r14,8), %xmm18 +// CHECK: encoding: [0x62,0xa2,0xfd,0x08,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00] + vpconflictq 4660(%rax,%r14,8), %xmm18 + +// CHECK: vpconflictq (%rcx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x11] + vpconflictq (%rcx){1to2}, %xmm18 + +// CHECK: vpconflictq 2032(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x7f] + vpconflictq 2032(%rdx), %xmm18 + +// CHECK: vpconflictq 2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0x00,0x08,0x00,0x00] + vpconflictq 2048(%rdx), %xmm18 + +// CHECK: vpconflictq -2048(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x52,0x80] + vpconflictq -2048(%rdx), %xmm18 + +// CHECK: vpconflictq -2064(%rdx), %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0xc4,0x92,0xf0,0xf7,0xff,0xff] + vpconflictq -2064(%rdx), %xmm18 + +// CHECK: vpconflictq 1016(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x7f] + vpconflictq 1016(%rdx){1to2}, %xmm18 + +// CHECK: vpconflictq 1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0x00,0x04,0x00,0x00] + vpconflictq 1024(%rdx){1to2}, %xmm18 + +// CHECK: vpconflictq -1024(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x52,0x80] + vpconflictq -1024(%rdx){1to2}, %xmm18 + +// CHECK: vpconflictq -1032(%rdx){1to2}, %xmm18 +// CHECK: encoding: [0x62,0xe2,0xfd,0x18,0xc4,0x92,0xf8,0xfb,0xff,0xff] + vpconflictq -1032(%rdx){1to2}, %xmm18 + +// CHECK: vpconflictq %ymm21, %ymm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0xc4,0xcd] + vpconflictq %ymm21, %ymm17 + +// CHECK: vpconflictq %ymm21, %ymm17 {%k6} +// CHECK: encoding: [0x62,0xa2,0xfd,0x2e,0xc4,0xcd] + vpconflictq %ymm21, %ymm17 {%k6} + +// CHECK: vpconflictq %ymm21, %ymm17 {%k6} {z} +// CHECK: encoding: [0x62,0xa2,0xfd,0xae,0xc4,0xcd] + vpconflictq %ymm21, %ymm17 {%k6} {z} + +// CHECK: vpconflictq (%rcx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x09] + vpconflictq (%rcx), %ymm17 + +// CHECK: vpconflictq 4660(%rax,%r14,8), %ymm17 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0xc4,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpconflictq 4660(%rax,%r14,8), %ymm17 + +// CHECK: vpconflictq (%rcx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x09] + vpconflictq (%rcx){1to4}, %ymm17 + +// CHECK: vpconflictq 4064(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x7f] + vpconflictq 4064(%rdx), %ymm17 + +// CHECK: vpconflictq 4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00] + vpconflictq 4096(%rdx), %ymm17 + +// CHECK: vpconflictq -4096(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x4a,0x80] + vpconflictq -4096(%rdx), %ymm17 + +// CHECK: vpconflictq -4128(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff] + vpconflictq -4128(%rdx), %ymm17 + +// CHECK: vpconflictq 1016(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x7f] + vpconflictq 1016(%rdx){1to4}, %ymm17 + +// CHECK: vpconflictq 1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0x00,0x04,0x00,0x00] + vpconflictq 1024(%rdx){1to4}, %ymm17 + +// CHECK: vpconflictq -1024(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x4a,0x80] + vpconflictq -1024(%rdx){1to4}, %ymm17 + +// CHECK: vpconflictq -1032(%rdx){1to4}, %ymm17 +// CHECK: encoding: [0x62,0xe2,0xfd,0x38,0xc4,0x8a,0xf8,0xfb,0xff,0xff] + vpconflictq -1032(%rdx){1to4}, %ymm17 + +// CHECK: vpconflictd %xmm27, %xmm21 +// CHECK: encoding: [0x62,0x82,0x7d,0x08,0xc4,0xeb] + vpconflictd %xmm27, %xmm21 + +// CHECK: vpconflictd %xmm27, %xmm21 {%k5} +// CHECK: encoding: [0x62,0x82,0x7d,0x0d,0xc4,0xeb] + vpconflictd %xmm27, %xmm21 {%k5} + +// CHECK: vpconflictd %xmm27, %xmm21 {%k5} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0x8d,0xc4,0xeb] + vpconflictd %xmm27, %xmm21 {%k5} {z} + +// CHECK: vpconflictd (%rcx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x29] + vpconflictd (%rcx), %xmm21 + +// CHECK: vpconflictd 291(%rax,%r14,8), %xmm21 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0xc4,0xac,0xf0,0x23,0x01,0x00,0x00] + vpconflictd 291(%rax,%r14,8), %xmm21 + +// CHECK: vpconflictd (%rcx){1to4}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x29] + vpconflictd (%rcx){1to4}, %xmm21 + +// CHECK: vpconflictd 2032(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x7f] + vpconflictd 2032(%rdx), %xmm21 + +// CHECK: vpconflictd 2048(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0x00,0x08,0x00,0x00] + vpconflictd 2048(%rdx), %xmm21 + +// CHECK: vpconflictd -2048(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0x6a,0x80] + vpconflictd -2048(%rdx), %xmm21 + +// CHECK: vpconflictd -2064(%rdx), %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0xc4,0xaa,0xf0,0xf7,0xff,0xff] + vpconflictd -2064(%rdx), %xmm21 + +// CHECK: vpconflictd 508(%rdx){1to4}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x7f] + vpconflictd 508(%rdx){1to4}, %xmm21 + +// CHECK: vpconflictd 512(%rdx){1to4}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0x00,0x02,0x00,0x00] + vpconflictd 512(%rdx){1to4}, %xmm21 + +// CHECK: vpconflictd -512(%rdx){1to4}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0x6a,0x80] + vpconflictd -512(%rdx){1to4}, %xmm21 + +// CHECK: vpconflictd -516(%rdx){1to4}, %xmm21 +// CHECK: encoding: [0x62,0xe2,0x7d,0x18,0xc4,0xaa,0xfc,0xfd,0xff,0xff] + vpconflictd -516(%rdx){1to4}, %xmm21 + +// CHECK: vpconflictd %ymm19, %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0xcb] + vpconflictd %ymm19, %ymm25 + +// CHECK: vpconflictd %ymm19, %ymm25 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xcb] + vpconflictd %ymm19, %ymm25 {%k4} + +// CHECK: vpconflictd %ymm19, %ymm25 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xac,0xc4,0xcb] + vpconflictd %ymm19, %ymm25 {%k4} {z} + +// CHECK: vpconflictd (%rcx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x09] + vpconflictd (%rcx), %ymm25 + +// CHECK: vpconflictd 291(%rax,%r14,8), %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpconflictd 291(%rax,%r14,8), %ymm25 + +// CHECK: vpconflictd (%rcx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x09] + vpconflictd (%rcx){1to8}, %ymm25 + +// CHECK: vpconflictd 4064(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x7f] + vpconflictd 4064(%rdx), %ymm25 + +// CHECK: vpconflictd 4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0x00,0x10,0x00,0x00] + vpconflictd 4096(%rdx), %ymm25 + +// CHECK: vpconflictd -4096(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x4a,0x80] + vpconflictd -4096(%rdx), %ymm25 + +// CHECK: vpconflictd -4128(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x8a,0xe0,0xef,0xff,0xff] + vpconflictd -4128(%rdx), %ymm25 + +// CHECK: vpconflictd 508(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x7f] + vpconflictd 508(%rdx){1to8}, %ymm25 + +// CHECK: vpconflictd 512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0x00,0x02,0x00,0x00] + vpconflictd 512(%rdx){1to8}, %ymm25 + +// CHECK: vpconflictd -512(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x4a,0x80] + vpconflictd -512(%rdx){1to8}, %ymm25 + +// CHECK: vpconflictd -516(%rdx){1to8}, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x8a,0xfc,0xfd,0xff,0xff] + vpconflictd -516(%rdx){1to8}, %ymm25 + +// CHECK: vpconflictd %xmm28, %xmm27 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0xc4,0xdc] + vpconflictd %xmm28, %xmm27 + +// CHECK: vpconflictd %xmm28, %xmm27 {%k3} +// CHECK: encoding: [0x62,0x02,0x7d,0x0b,0xc4,0xdc] + vpconflictd %xmm28, %xmm27 {%k3} + +// CHECK: vpconflictd %xmm28, %xmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8b,0xc4,0xdc] + vpconflictd %xmm28, %xmm27 {%k3} {z} + +// CHECK: vpconflictd (%rcx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x19] + vpconflictd (%rcx), %xmm27 + +// CHECK: vpconflictd 4660(%rax,%r14,8), %xmm27 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0xc4,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpconflictd 4660(%rax,%r14,8), %xmm27 + +// CHECK: vpconflictd (%rcx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x19] + vpconflictd (%rcx){1to4}, %xmm27 + +// CHECK: vpconflictd 2032(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x7f] + vpconflictd 2032(%rdx), %xmm27 + +// CHECK: vpconflictd 2048(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0x00,0x08,0x00,0x00] + vpconflictd 2048(%rdx), %xmm27 + +// CHECK: vpconflictd -2048(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x5a,0x80] + vpconflictd -2048(%rdx), %xmm27 + +// CHECK: vpconflictd -2064(%rdx), %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0xc4,0x9a,0xf0,0xf7,0xff,0xff] + vpconflictd -2064(%rdx), %xmm27 + +// CHECK: vpconflictd 508(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x7f] + vpconflictd 508(%rdx){1to4}, %xmm27 + +// CHECK: vpconflictd 512(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0x00,0x02,0x00,0x00] + vpconflictd 512(%rdx){1to4}, %xmm27 + +// CHECK: vpconflictd -512(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x5a,0x80] + vpconflictd -512(%rdx){1to4}, %xmm27 + +// CHECK: vpconflictd -516(%rdx){1to4}, %xmm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x18,0xc4,0x9a,0xfc,0xfd,0xff,0xff] + vpconflictd -516(%rdx){1to4}, %xmm27 + +// CHECK: vpconflictd %ymm21, %ymm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0xd5] + vpconflictd %ymm21, %ymm26 + +// CHECK: vpconflictd %ymm21, %ymm26 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x2c,0xc4,0xd5] + vpconflictd %ymm21, %ymm26 {%k4} + +// CHECK: vpconflictd %ymm21, %ymm26 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xac,0xc4,0xd5] + vpconflictd %ymm21, %ymm26 {%k4} {z} + +// CHECK: vpconflictd (%rcx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x11] + vpconflictd (%rcx), %ymm26 + +// CHECK: vpconflictd 4660(%rax,%r14,8), %ymm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0xc4,0x94,0xf0,0x34,0x12,0x00,0x00] + vpconflictd 4660(%rax,%r14,8), %ymm26 + +// CHECK: vpconflictd (%rcx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x11] + vpconflictd (%rcx){1to8}, %ymm26 + +// CHECK: vpconflictd 4064(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x7f] + vpconflictd 4064(%rdx), %ymm26 + +// CHECK: vpconflictd 4096(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0x00,0x10,0x00,0x00] + vpconflictd 4096(%rdx), %ymm26 + +// CHECK: vpconflictd -4096(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x52,0x80] + vpconflictd -4096(%rdx), %ymm26 + +// CHECK: vpconflictd -4128(%rdx), %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0xc4,0x92,0xe0,0xef,0xff,0xff] + vpconflictd -4128(%rdx), %ymm26 + +// CHECK: vpconflictd 508(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x7f] + vpconflictd 508(%rdx){1to8}, %ymm26 + +// CHECK: vpconflictd 512(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0x00,0x02,0x00,0x00] + vpconflictd 512(%rdx){1to8}, %ymm26 + +// CHECK: vpconflictd -512(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x52,0x80] + vpconflictd -512(%rdx){1to8}, %ymm26 + +// CHECK: vpconflictd -516(%rdx){1to8}, %ymm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x38,0xc4,0x92,0xfc,0xfd,0xff,0xff] + vpconflictd -516(%rdx){1to8}, %ymm26 +