Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -7048,6 +7048,82 @@ [llvm_ptr_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrReadWriteArgMem]>; } + +// Bitwise ternary logic +let TargetPrefix = "x86" in { + def int_x86_avx512_mask_pternlog_d_128 : + GCCBuiltin<"__builtin_ia32_pternlogd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_d_128 : + GCCBuiltin<"__builtin_ia32_pternlogd128_maskz">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_d_256 : + GCCBuiltin<"__builtin_ia32_pternlogd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_d_256 : + GCCBuiltin<"__builtin_ia32_pternlogd256_maskz">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_d_512 : + GCCBuiltin<"__builtin_ia32_pternlogd512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_d_512 : + GCCBuiltin<"__builtin_ia32_pternlogd512_maskz">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i32_ty, + llvm_i16_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_q_128 : + GCCBuiltin<"__builtin_ia32_pternlogq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_q_128 : + GCCBuiltin<"__builtin_ia32_pternlogq128_maskz">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_q_256 : + GCCBuiltin<"__builtin_ia32_pternlogq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_q_256 : + GCCBuiltin<"__builtin_ia32_pternlogq256_maskz">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_pternlog_q_512 : + GCCBuiltin<"__builtin_ia32_pternlogq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_maskz_pternlog_q_512 : + GCCBuiltin<"__builtin_ia32_pternlogq512_maskz">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i32_ty, + llvm_i8_ty], [IntrNoMem]>; +} + // Misc. let TargetPrefix = "x86" in { def int_x86_avx512_mask_cmp_ps_512 : Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -389,9 +389,11 @@ VPERMIV3, VPERMI, VPERM2X128, - //Fix Up Special Packed Float32/64 values + // Bitwise ternary logic + VPTERNLOG, + // Fix Up Special Packed Float32/64 values VFIXUPIMM, - //Range Restriction Calculation For Packed Pairs of Float32/64 values + // Range Restriction Calculation For Packed Pairs of Float32/64 values VRANGE, // Reduce - Perform Reduction Transformation on scalar\packed FP VREDUCE, Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -16252,6 +16252,23 @@ Src1, Src2, Src3), Mask, PassThru, Subtarget, DAG); } + case TERLOG_OP_MASK: + case TERLOG_OP_MASKZ: { + SDValue Src1 = Op.getOperand(1); + SDValue Src2 = Op.getOperand(2); + SDValue Src3 = Op.getOperand(3); + SDValue Src4 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Op.getOperand(4)); + SDValue Mask = Op.getOperand(5); + EVT VT = Op.getValueType(); + SDValue PassThru = Src1; + // Set PassThru element. + if (IntrData->Type == TERLOG_OP_MASKZ) + PassThru = getZeroVector(VT, Subtarget, DAG, dl); + + return getVectorMaskingNode(DAG.getNode(IntrData->Opc0, dl, VT, + Src1, Src2, Src3, Src4), + Mask, PassThru, Subtarget, DAG); + } case FPCLASS: { // FPclass intrinsics with mask SDValue Src1 = Op.getOperand(1); @@ -19915,6 +19932,7 @@ case X86ISD::VPERMV3: return "X86ISD::VPERMV3"; case X86ISD::VPERMIV3: return "X86ISD::VPERMIV3"; case X86ISD::VPERMI: return "X86ISD::VPERMI"; + case X86ISD::VPTERNLOG: return "X86ISD::VPTERNLOG"; case X86ISD::VFIXUPIMM: return "X86ISD::VFIXUPIMM"; case X86ISD::VRANGE: return "X86ISD::VRANGE"; case X86ISD::PMULUDQ: return "X86ISD::PMULUDQ"; Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -7148,3 +7148,48 @@ defm VPSADBW : avx512_psadbw_packed_all<0xf6, X86psadbw, "vpsadbw", HasBWI>, EVEX_4V; + +multiclass avx512_ternlog opc, string OpcodeStr, SDNode OpNode, + X86VectorVTInfo _>{ + let Constraints = "$src1 = $dst" in { + defm rri : AVX512_maskable_3src, AVX512AIi8Base, EVEX_4V; + let mayLoad = 1 in { + defm rmi : AVX512_maskable_3src, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + defm rmbi : AVX512_maskable_3src, EVEX_B, + AVX512AIi8Base, EVEX_4V, EVEX_CD8<_.EltSize, CD8VF>; + } + }// Constraints = "$src1 = $dst" +} + +multiclass avx512_common_ternlog{ + let Predicates = [HasAVX512] in + defm Z : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info512>, EVEX_V512; + let Predicates = [HasAVX512, HasVLX] in { + defm Z128 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info128>, EVEX_V128; + defm Z256 : avx512_ternlog<0x25, OpcodeStr, X86vpternlog, _.info256>, EVEX_V256; + } +} + +defm VPTERNLOGD : avx512_common_ternlog<"vpternlogd", avx512vl_i32_info>; +defm VPTERNLOGQ : avx512_common_ternlog<"vpternlogq", avx512vl_i64_info>, VEX_W; + Index: llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td +++ llvm/trunk/lib/Target/X86/X86InstrFragmentsSIMD.td @@ -293,6 +293,10 @@ def SDTBlend : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<1,2>, SDTCisVT<3, i8>]>; +def SDTTernlog : SDTypeProfile<1, 4, [SDTCisVec<0>, SDTCisSameAs<0,1>, + SDTCisSameAs<0,2>, SDTCisSameAs<0,3>, + SDTCisInt<4>]>; + def SDTFPBinOpRound : SDTypeProfile<1, 3, [ // fadd_round, fmul_round, etc. SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, SDTCisFP<0>, SDTCisInt<3>]>; @@ -353,6 +357,7 @@ def X86VPermi : SDNode<"X86ISD::VPERMI", SDTShuff2OpI>; def X86VPermv3 : SDNode<"X86ISD::VPERMV3", SDTShuff3Op>; def X86VPermiv3 : SDNode<"X86ISD::VPERMIV3", SDTShuff3Op>; +def X86vpternlog : SDNode<"X86ISD::VPTERNLOG", SDTTernlog>; def X86VPerm2x128 : SDNode<"X86ISD::VPERM2X128", SDTShuff3OpI>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -29,7 +29,8 @@ INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC + EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC, + TERLOG_OP_MASK, TERLOG_OP_MASKZ }; struct IntrinsicData { @@ -1145,6 +1146,18 @@ X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_pternlog_d_128, TERLOG_OP_MASK, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_mask_pternlog_d_256, TERLOG_OP_MASK, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_mask_pternlog_d_512, TERLOG_OP_MASK, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_mask_pternlog_q_128, TERLOG_OP_MASK, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_mask_pternlog_q_256, TERLOG_OP_MASK, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_mask_pternlog_q_512, TERLOG_OP_MASK, + X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK, X86ISD::UNPCKH, 0), X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK, @@ -1489,7 +1502,18 @@ X86_INTRINSIC_DATA(avx512_mask_xor_ps_128, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_256, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), X86_INTRINSIC_DATA(avx512_mask_xor_ps_512, INTR_TYPE_2OP_MASK, X86ISD::FXOR, 0), - + X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_128, TERLOG_OP_MASKZ, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_256, TERLOG_OP_MASKZ, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_maskz_pternlog_d_512, TERLOG_OP_MASKZ, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_128, TERLOG_OP_MASKZ, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_256, TERLOG_OP_MASKZ, + X86ISD::VPTERNLOG, 0), + X86_INTRINSIC_DATA(avx512_maskz_pternlog_q_512, TERLOG_OP_MASKZ, + X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_128, FMA_OP_MASKZ, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_256, FMA_OP_MASKZ, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_maskz_vfmadd_pd_512, FMA_OP_MASKZ, X86ISD::FMADD, Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -4541,3 +4541,74 @@ %res2 = fadd <4 x float> %res, %res1 ret <4 x float> %res2 } + +declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) + +define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16) + +define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4) + %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) + +define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8) + +define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm3 {%k1} {z} +; CHECK-NEXT: vpternlogq $33, %zmm2, %zmm1, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4) + %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5022,3 +5022,147 @@ %res4 = add <8 x i32> %res2, %res3 ret <8 x i32> %res4 } + +declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) + +define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8) + +define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vpternlogd $33, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4) + %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) + +define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8) + +define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_d_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vpternlogd $33, %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4) + %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) + +define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8) + +define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm3 {%k1} {z} +; CHECK-NEXT: vpternlogq $33, %xmm2, %xmm1, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm3, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4) + %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) + +define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_pternlog_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8) + +define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_maskz_pternlog_q_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vmovaps %zmm0, %zmm3 +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm3 {%k1} {z} +; CHECK-NEXT: vpternlogq $33, %ymm2, %ymm1, %ymm0 +; CHECK-NEXT: vpaddq %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4) + %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} Index: llvm/trunk/test/MC/X86/avx512-encodings.s =================================================================== --- llvm/trunk/test/MC/X86/avx512-encodings.s +++ llvm/trunk/test/MC/X86/avx512-encodings.s @@ -17612,3 +17612,124 @@ // CHECK: vrcp14ss -516(%rdx), %xmm8, %xmm8 // CHECK: encoding: [0x62,0x72,0x3d,0x08,0x4d,0x82,0xfc,0xfd,0xff,0xff] vrcp14ss -516(%rdx), %xmm8, %xmm8 + +// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0xab] + vpternlogd $0xab, %zmm20, %zmm14, %zmm12 + +// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7} +// CHECK: encoding: [0x62,0x33,0x0d,0x4f,0x25,0xe4,0xab] + vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7} + +// CHECK: vpternlogd $171, %zmm20, %zmm14, %zmm12 {%k7} {z} +// CHECK: encoding: [0x62,0x33,0x0d,0xcf,0x25,0xe4,0xab] + vpternlogd $0xab, %zmm20, %zmm14, %zmm12 {%k7} {z} + +// CHECK: vpternlogd $123, %zmm20, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xe4,0x7b] + vpternlogd $0x7b, %zmm20, %zmm14, %zmm12 + +// CHECK: vpternlogd $123, (%rcx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x21,0x7b] + vpternlogd $0x7b, (%rcx), %zmm14, %zmm12 + +// CHECK: vpternlogd $123, 291(%rax,%r14,8), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x33,0x0d,0x48,0x25,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpternlogd $0x7b, 291(%rax,%r14,8), %zmm14, %zmm12 + +// CHECK: vpternlogd $123, (%rcx){1to16}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x21,0x7b] + vpternlogd $0x7b, (%rcx){1to16}, %zmm14, %zmm12 + +// CHECK: vpternlogd $123, 8128(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x7f,0x7b] + vpternlogd $0x7b, 8128(%rdx), %zmm14, %zmm12 + +// CHECK: vpternlogd $123, 8192(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0x00,0x20,0x00,0x00,0x7b] + vpternlogd $0x7b, 8192(%rdx), %zmm14, %zmm12 + +// CHECK: vpternlogd $123, -8192(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0x62,0x80,0x7b] + vpternlogd $0x7b, -8192(%rdx), %zmm14, %zmm12 + +// CHECK: vpternlogd $123, -8256(%rdx), %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x48,0x25,0xa2,0xc0,0xdf,0xff,0xff,0x7b] + vpternlogd $0x7b, -8256(%rdx), %zmm14, %zmm12 + +// CHECK: vpternlogd $123, 508(%rdx){1to16}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x7f,0x7b] + vpternlogd $0x7b, 508(%rdx){1to16}, %zmm14, %zmm12 + +// CHECK: vpternlogd $123, 512(%rdx){1to16}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0x00,0x02,0x00,0x00,0x7b] + vpternlogd $0x7b, 512(%rdx){1to16}, %zmm14, %zmm12 + +// CHECK: vpternlogd $123, -512(%rdx){1to16}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0x62,0x80,0x7b] + vpternlogd $0x7b, -512(%rdx){1to16}, %zmm14, %zmm12 + +// CHECK: vpternlogd $123, -516(%rdx){1to16}, %zmm14, %zmm12 +// CHECK: encoding: [0x62,0x73,0x0d,0x58,0x25,0xa2,0xfc,0xfd,0xff,0xff,0x7b] + vpternlogd $0x7b, -516(%rdx){1to16}, %zmm14, %zmm12 + +// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0xab] + vpternlogq $0xab, %zmm21, %zmm2, %zmm15 + +// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3} +// CHECK: encoding: [0x62,0x33,0xed,0x4b,0x25,0xfd,0xab] + vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3} + +// CHECK: vpternlogq $171, %zmm21, %zmm2, %zmm15 {%k3} {z} +// CHECK: encoding: [0x62,0x33,0xed,0xcb,0x25,0xfd,0xab] + vpternlogq $0xab, %zmm21, %zmm2, %zmm15 {%k3} {z} + +// CHECK: vpternlogq $123, %zmm21, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xfd,0x7b] + vpternlogq $0x7b, %zmm21, %zmm2, %zmm15 + +// CHECK: vpternlogq $123, (%rcx), %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x39,0x7b] + vpternlogq $0x7b, (%rcx), %zmm2, %zmm15 + +// CHECK: vpternlogq $123, 291(%rax,%r14,8), %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x33,0xed,0x48,0x25,0xbc,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpternlogq $0x7b, 291(%rax,%r14,8), %zmm2, %zmm15 + +// CHECK: vpternlogq $123, (%rcx){1to8}, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x39,0x7b] + vpternlogq $0x7b, (%rcx){1to8}, %zmm2, %zmm15 + +// CHECK: vpternlogq $123, 8128(%rdx), %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x7f,0x7b] + vpternlogq $0x7b, 8128(%rdx), %zmm2, %zmm15 + +// CHECK: vpternlogq $123, 8192(%rdx), %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0x00,0x20,0x00,0x00,0x7b] + vpternlogq $0x7b, 8192(%rdx), %zmm2, %zmm15 + +// CHECK: vpternlogq $123, -8192(%rdx), %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0x7a,0x80,0x7b] + vpternlogq $0x7b, -8192(%rdx), %zmm2, %zmm15 + +// CHECK: vpternlogq $123, -8256(%rdx), %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x48,0x25,0xba,0xc0,0xdf,0xff,0xff,0x7b] + vpternlogq $0x7b, -8256(%rdx), %zmm2, %zmm15 + +// CHECK: vpternlogq $123, 1016(%rdx){1to8}, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x7f,0x7b] + vpternlogq $0x7b, 1016(%rdx){1to8}, %zmm2, %zmm15 + +// CHECK: vpternlogq $123, 1024(%rdx){1to8}, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0x00,0x04,0x00,0x00,0x7b] + vpternlogq $0x7b, 1024(%rdx){1to8}, %zmm2, %zmm15 + +// CHECK: vpternlogq $123, -1024(%rdx){1to8}, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0x7a,0x80,0x7b] + vpternlogq $0x7b, -1024(%rdx){1to8}, %zmm2, %zmm15 + +// CHECK: vpternlogq $123, -1032(%rdx){1to8}, %zmm2, %zmm15 +// CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b] + vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15 + Index: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s +++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s @@ -21322,3 +21322,244 @@ // CHECK: vcvttpd2udq -1032(%rdx){1to4}, %xmm28 // CHECK: encoding: [0x62,0x61,0xfc,0x38,0x78,0xa2,0xf8,0xfb,0xff,0xff] vcvttpd2udq -1032(%rdx){1to4}, %xmm28 + +// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0xab] + vpternlogd $0xab, %xmm25, %xmm19, %xmm27 + +// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7} +// CHECK: encoding: [0x62,0x03,0x65,0x07,0x25,0xd9,0xab] + vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7} + +// CHECK: vpternlogd $171, %xmm25, %xmm19, %xmm27 {%k7} {z} +// CHECK: encoding: [0x62,0x03,0x65,0x87,0x25,0xd9,0xab] + vpternlogd $0xab, %xmm25, %xmm19, %xmm27 {%k7} {z} + +// CHECK: vpternlogd $123, %xmm25, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x03,0x65,0x00,0x25,0xd9,0x7b] + vpternlogd $0x7b, %xmm25, %xmm19, %xmm27 + +// CHECK: vpternlogd $123, (%rcx), %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x19,0x7b] + vpternlogd $0x7b, (%rcx), %xmm19, %xmm27 + +// CHECK: vpternlogd $123, 291(%rax,%r14,8), %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x23,0x65,0x00,0x25,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpternlogd $0x7b, 291(%rax,%r14,8), %xmm19, %xmm27 + +// CHECK: vpternlogd $123, (%rcx){1to4}, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x19,0x7b] + vpternlogd $0x7b, (%rcx){1to4}, %xmm19, %xmm27 + +// CHECK: vpternlogd $123, 2032(%rdx), %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x7f,0x7b] + vpternlogd $0x7b, 2032(%rdx), %xmm19, %xmm27 + +// CHECK: vpternlogd $123, 2048(%rdx), %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0x00,0x08,0x00,0x00,0x7b] + vpternlogd $0x7b, 2048(%rdx), %xmm19, %xmm27 + +// CHECK: vpternlogd $123, -2048(%rdx), %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x5a,0x80,0x7b] + vpternlogd $0x7b, -2048(%rdx), %xmm19, %xmm27 + +// CHECK: vpternlogd $123, -2064(%rdx), %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x00,0x25,0x9a,0xf0,0xf7,0xff,0xff,0x7b] + vpternlogd $0x7b, -2064(%rdx), %xmm19, %xmm27 + +// CHECK: vpternlogd $123, 508(%rdx){1to4}, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x7f,0x7b] + vpternlogd $0x7b, 508(%rdx){1to4}, %xmm19, %xmm27 + +// CHECK: vpternlogd $123, 512(%rdx){1to4}, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0x00,0x02,0x00,0x00,0x7b] + vpternlogd $0x7b, 512(%rdx){1to4}, %xmm19, %xmm27 + +// CHECK: vpternlogd $123, -512(%rdx){1to4}, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x5a,0x80,0x7b] + vpternlogd $0x7b, -512(%rdx){1to4}, %xmm19, %xmm27 + +// CHECK: vpternlogd $123, -516(%rdx){1to4}, %xmm19, %xmm27 +// CHECK: encoding: [0x62,0x63,0x65,0x10,0x25,0x9a,0xfc,0xfd,0xff,0xff,0x7b] + vpternlogd $0x7b, -516(%rdx){1to4}, %xmm19, %xmm27 + +// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0xab] + vpternlogd $0xab, %ymm20, %ymm17, %ymm29 + +// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3} +// CHECK: encoding: [0x62,0x23,0x75,0x23,0x25,0xec,0xab] + vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3} + +// CHECK: vpternlogd $171, %ymm20, %ymm17, %ymm29 {%k3} {z} +// CHECK: encoding: [0x62,0x23,0x75,0xa3,0x25,0xec,0xab] + vpternlogd $0xab, %ymm20, %ymm17, %ymm29 {%k3} {z} + +// CHECK: vpternlogd $123, %ymm20, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xec,0x7b] + vpternlogd $0x7b, %ymm20, %ymm17, %ymm29 + +// CHECK: vpternlogd $123, (%rcx), %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x29,0x7b] + vpternlogd $0x7b, (%rcx), %ymm17, %ymm29 + +// CHECK: vpternlogd $123, 291(%rax,%r14,8), %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x23,0x75,0x20,0x25,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpternlogd $0x7b, 291(%rax,%r14,8), %ymm17, %ymm29 + +// CHECK: vpternlogd $123, (%rcx){1to8}, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x29,0x7b] + vpternlogd $0x7b, (%rcx){1to8}, %ymm17, %ymm29 + +// CHECK: vpternlogd $123, 4064(%rdx), %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x7f,0x7b] + vpternlogd $0x7b, 4064(%rdx), %ymm17, %ymm29 + +// CHECK: vpternlogd $123, 4096(%rdx), %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0x00,0x10,0x00,0x00,0x7b] + vpternlogd $0x7b, 4096(%rdx), %ymm17, %ymm29 + +// CHECK: vpternlogd $123, -4096(%rdx), %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0x6a,0x80,0x7b] + vpternlogd $0x7b, -4096(%rdx), %ymm17, %ymm29 + +// CHECK: vpternlogd $123, -4128(%rdx), %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x20,0x25,0xaa,0xe0,0xef,0xff,0xff,0x7b] + vpternlogd $0x7b, -4128(%rdx), %ymm17, %ymm29 + +// CHECK: vpternlogd $123, 508(%rdx){1to8}, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x7f,0x7b] + vpternlogd $0x7b, 508(%rdx){1to8}, %ymm17, %ymm29 + +// CHECK: vpternlogd $123, 512(%rdx){1to8}, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0x00,0x02,0x00,0x00,0x7b] + vpternlogd $0x7b, 512(%rdx){1to8}, %ymm17, %ymm29 + +// CHECK: vpternlogd $123, -512(%rdx){1to8}, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0x6a,0x80,0x7b] + vpternlogd $0x7b, -512(%rdx){1to8}, %ymm17, %ymm29 + +// CHECK: vpternlogd $123, -516(%rdx){1to8}, %ymm17, %ymm29 +// CHECK: encoding: [0x62,0x63,0x75,0x30,0x25,0xaa,0xfc,0xfd,0xff,0xff,0x7b] + vpternlogd $0x7b, -516(%rdx){1to8}, %ymm17, %ymm29 + +// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0xab] + vpternlogq $0xab, %xmm22, %xmm25, %xmm17 + +// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1} +// CHECK: encoding: [0x62,0xa3,0xb5,0x01,0x25,0xce,0xab] + vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1} + +// CHECK: vpternlogq $171, %xmm22, %xmm25, %xmm17 {%k1} {z} +// CHECK: encoding: [0x62,0xa3,0xb5,0x81,0x25,0xce,0xab] + vpternlogq $0xab, %xmm22, %xmm25, %xmm17 {%k1} {z} + +// CHECK: vpternlogq $123, %xmm22, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0xce,0x7b] + vpternlogq $0x7b, %xmm22, %xmm25, %xmm17 + +// CHECK: vpternlogq $123, (%rcx), %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x09,0x7b] + vpternlogq $0x7b, (%rcx), %xmm25, %xmm17 + +// CHECK: vpternlogq $123, 291(%rax,%r14,8), %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xa3,0xb5,0x00,0x25,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpternlogq $0x7b, 291(%rax,%r14,8), %xmm25, %xmm17 + +// CHECK: vpternlogq $123, (%rcx){1to2}, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x09,0x7b] + vpternlogq $0x7b, (%rcx){1to2}, %xmm25, %xmm17 + +// CHECK: vpternlogq $123, 2032(%rdx), %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x7f,0x7b] + vpternlogq $0x7b, 2032(%rdx), %xmm25, %xmm17 + +// CHECK: vpternlogq $123, 2048(%rdx), %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0x00,0x08,0x00,0x00,0x7b] + vpternlogq $0x7b, 2048(%rdx), %xmm25, %xmm17 + +// CHECK: vpternlogq $123, -2048(%rdx), %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x4a,0x80,0x7b] + vpternlogq $0x7b, -2048(%rdx), %xmm25, %xmm17 + +// CHECK: vpternlogq $123, -2064(%rdx), %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x00,0x25,0x8a,0xf0,0xf7,0xff,0xff,0x7b] + vpternlogq $0x7b, -2064(%rdx), %xmm25, %xmm17 + +// CHECK: vpternlogq $123, 1016(%rdx){1to2}, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x7f,0x7b] + vpternlogq $0x7b, 1016(%rdx){1to2}, %xmm25, %xmm17 + +// CHECK: vpternlogq $123, 1024(%rdx){1to2}, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0x00,0x04,0x00,0x00,0x7b] + vpternlogq $0x7b, 1024(%rdx){1to2}, %xmm25, %xmm17 + +// CHECK: vpternlogq $123, -1024(%rdx){1to2}, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x4a,0x80,0x7b] + vpternlogq $0x7b, -1024(%rdx){1to2}, %xmm25, %xmm17 + +// CHECK: vpternlogq $123, -1032(%rdx){1to2}, %xmm25, %xmm17 +// CHECK: encoding: [0x62,0xe3,0xb5,0x10,0x25,0x8a,0xf8,0xfb,0xff,0xff,0x7b] + vpternlogq $0x7b, -1032(%rdx){1to2}, %xmm25, %xmm17 + +// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0xab] + vpternlogq $0xab, %ymm25, %ymm23, %ymm26 + +// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6} +// CHECK: encoding: [0x62,0x03,0xc5,0x26,0x25,0xd1,0xab] + vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6} + +// CHECK: vpternlogq $171, %ymm25, %ymm23, %ymm26 {%k6} {z} +// CHECK: encoding: [0x62,0x03,0xc5,0xa6,0x25,0xd1,0xab] + vpternlogq $0xab, %ymm25, %ymm23, %ymm26 {%k6} {z} + +// CHECK: vpternlogq $123, %ymm25, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x25,0xd1,0x7b] + vpternlogq $0x7b, %ymm25, %ymm23, %ymm26 + +// CHECK: vpternlogq $123, (%rcx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x11,0x7b] + vpternlogq $0x7b, (%rcx), %ymm23, %ymm26 + +// CHECK: vpternlogq $123, 291(%rax,%r14,8), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x23,0xc5,0x20,0x25,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vpternlogq $0x7b, 291(%rax,%r14,8), %ymm23, %ymm26 + +// CHECK: vpternlogq $123, (%rcx){1to4}, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x11,0x7b] + vpternlogq $0x7b, (%rcx){1to4}, %ymm23, %ymm26 + +// CHECK: vpternlogq $123, 4064(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x7f,0x7b] + vpternlogq $0x7b, 4064(%rdx), %ymm23, %ymm26 + +// CHECK: vpternlogq $123, 4096(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0x00,0x10,0x00,0x00,0x7b] + vpternlogq $0x7b, 4096(%rdx), %ymm23, %ymm26 + +// CHECK: vpternlogq $123, -4096(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x52,0x80,0x7b] + vpternlogq $0x7b, -4096(%rdx), %ymm23, %ymm26 + +// CHECK: vpternlogq $123, -4128(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x25,0x92,0xe0,0xef,0xff,0xff,0x7b] + vpternlogq $0x7b, -4128(%rdx), %ymm23, %ymm26 + +// CHECK: vpternlogq $123, 1016(%rdx){1to4}, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x7f,0x7b] + vpternlogq $0x7b, 1016(%rdx){1to4}, %ymm23, %ymm26 + +// CHECK: vpternlogq $123, 1024(%rdx){1to4}, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0x00,0x04,0x00,0x00,0x7b] + vpternlogq $0x7b, 1024(%rdx){1to4}, %ymm23, %ymm26 + +// CHECK: vpternlogq $123, -1024(%rdx){1to4}, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x52,0x80,0x7b] + vpternlogq $0x7b, -1024(%rdx){1to4}, %ymm23, %ymm26 + +// CHECK: vpternlogq $123, -1032(%rdx){1to4}, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b] + vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26 +