Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -3751,6 +3751,225 @@ llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; } +// Unpack ops. +let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". + def int_x86_avx512_mask_unpckh_pd_128 : + GCCBuiltin<"__builtin_ia32_unpckhpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_pd_256 : + GCCBuiltin<"__builtin_ia32_unpckhpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_pd_512 : + GCCBuiltin<"__builtin_ia32_unpckhpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_ps_128 : + GCCBuiltin<"__builtin_ia32_unpckhps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_ps_256 : + GCCBuiltin<"__builtin_ia32_unpckhps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckh_ps_512 : + GCCBuiltin<"__builtin_ia32_unpckhps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_pd_128 : + GCCBuiltin<"__builtin_ia32_unpcklpd128_mask">, + Intrinsic<[llvm_v2f64_ty], + [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_pd_256 : + GCCBuiltin<"__builtin_ia32_unpcklpd256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v4f64_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_pd_512 : + GCCBuiltin<"__builtin_ia32_unpcklpd512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_ps_128 : + GCCBuiltin<"__builtin_ia32_unpcklps128_mask">, + Intrinsic<[llvm_v4f32_ty], + [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_ps_256 : + GCCBuiltin<"__builtin_ia32_unpcklps256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v8f32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_unpckl_ps_512 : + GCCBuiltin<"__builtin_ia32_unpcklps512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhb_w_128 : + GCCBuiltin<"__builtin_ia32_punpckhbw128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhb_w_256 : + GCCBuiltin<"__builtin_ia32_punpckhbw256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhb_w_512 : + GCCBuiltin<"__builtin_ia32_punpckhbw512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhd_q_128 : + GCCBuiltin<"__builtin_ia32_punpckhdq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhd_q_256 : + GCCBuiltin<"__builtin_ia32_punpckhdq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhd_q_512 : + GCCBuiltin<"__builtin_ia32_punpckhdq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhqd_q_128 : + GCCBuiltin<"__builtin_ia32_punpckhqdq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhqd_q_256 : + GCCBuiltin<"__builtin_ia32_punpckhqdq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhqd_q_512 : + GCCBuiltin<"__builtin_ia32_punpckhqdq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhw_d_128 : + GCCBuiltin<"__builtin_ia32_punpckhwd128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhw_d_256 : + GCCBuiltin<"__builtin_ia32_punpckhwd256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckhw_d_512 : + GCCBuiltin<"__builtin_ia32_punpckhwd512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklb_w_128 : + GCCBuiltin<"__builtin_ia32_punpcklbw128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklb_w_256 : + GCCBuiltin<"__builtin_ia32_punpcklbw256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v32i8_ty, llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklb_w_512 : + GCCBuiltin<"__builtin_ia32_punpcklbw512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v64i8_ty, llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckld_q_128 : + GCCBuiltin<"__builtin_ia32_punpckldq128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckld_q_256 : + GCCBuiltin<"__builtin_ia32_punpckldq256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v8i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpckld_q_512 : + GCCBuiltin<"__builtin_ia32_punpckldq512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v16i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklqd_q_128 : + GCCBuiltin<"__builtin_ia32_punpcklqdq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklqd_q_256 : + GCCBuiltin<"__builtin_ia32_punpcklqdq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklqd_q_512 : + GCCBuiltin<"__builtin_ia32_punpcklqdq512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklw_d_128 : + GCCBuiltin<"__builtin_ia32_punpcklwd128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklw_d_256 : + GCCBuiltin<"__builtin_ia32_punpcklwd256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v16i16_ty, llvm_v16i16_ty, llvm_v16i16_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_punpcklw_d_512 : + GCCBuiltin<"__builtin_ia32_punpcklwd512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v32i16_ty, llvm_v32i16_ty, llvm_v32i16_ty, llvm_i32_ty], + [IntrNoMem]>; +} + // Vector convert let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_cvtdq2pd_128 : Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -2964,7 +2964,7 @@ X86VectorVTInfo _, OpndItins itins, bit IsCommutable = 0> { defm rr : AVX512_maskable, @@ -2972,7 +2972,7 @@ let mayLoad = 1 in defm rm : AVX512_maskable { let mayLoad = 1 in defm rmb : AVX512_maskable opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, - bit IsCommutable = 0> { - defm NAME : avx512_binop_rmb_vl, - VEX_W, EVEX_CD8<64, CD8VF>; + bit IsCommutable = 0, string suffix = "q"> { + defm NAME : avx512_binop_rmb_vl, + VEX_W, EVEX_CD8<64, CD8VF>; } multiclass avx512_binop_rm_vl_d opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, - bit IsCommutable = 0> { - defm NAME : avx512_binop_rmb_vl, EVEX_CD8<32, CD8VF>; + bit IsCommutable = 0, string suffix = "d"> { + defm NAME : avx512_binop_rmb_vl, + EVEX_CD8<32, CD8VF>; } multiclass avx512_binop_rm_vl_w opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, - bit IsCommutable = 0> { - defm NAME : avx512_binop_rm_vl, EVEX_CD8<16, CD8VF>; + bit IsCommutable = 0, string suffix = "w"> { + defm NAME : avx512_binop_rm_vl, + EVEX_CD8<16, CD8VF>; } multiclass avx512_binop_rm_vl_b opc, string OpcodeStr, SDNode OpNode, OpndItins itins, Predicate prd, - bit IsCommutable = 0> { - defm NAME : avx512_binop_rm_vl, EVEX_CD8<8, CD8VF>; + bit IsCommutable = 0, string suffix = "b"> { + defm NAME : avx512_binop_rm_vl, + EVEX_CD8<8, CD8VF>; } multiclass avx512_binop_rm_vl_dq opc_d, bits<8> opc_q, string OpcodeStr, @@ -3282,68 +3285,6 @@ SSE_INTALU_ITINS_P, HasBWI, 1>, T8PD; defm VPMINU : avx512_binop_rm_vl_dq<0x3B, 0x3B, "vpminu", umin, SSE_INTALU_ITINS_P, HasAVX512, 1>, T8PD; - -//===----------------------------------------------------------------------===// -// AVX-512 - Unpack Instructions -//===----------------------------------------------------------------------===// - -multiclass avx512_unpack_fp opc, SDNode OpNode, ValueType vt, - PatFrag mem_frag, RegisterClass RC, - X86MemOperand x86memop, string asm, - Domain d> { - def rr : AVX512PI, EVEX_4V; - def rm : AVX512PI, EVEX_4V; -} - -defm VUNPCKHPSZ: avx512_unpack_fp<0x15, X86Unpckh, v16f32, loadv8f64, - VR512, f512mem, "vunpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKHPDZ: avx512_unpack_fp<0x15, X86Unpckh, v8f64, loadv8f64, - VR512, f512mem, "vunpckhpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; -defm VUNPCKLPSZ: avx512_unpack_fp<0x14, X86Unpckl, v16f32, loadv8f64, - VR512, f512mem, "vunpcklps\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedSingle>, PS, EVEX_V512, EVEX_CD8<32, CD8VF>; -defm VUNPCKLPDZ: avx512_unpack_fp<0x14, X86Unpckl, v8f64, loadv8f64, - VR512, f512mem, "vunpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", - SSEPackedDouble>, PD, EVEX_V512, VEX_W, EVEX_CD8<64, CD8VF>; - -multiclass avx512_unpack_int opc, string OpcodeStr, SDNode OpNode, - ValueType OpVT, RegisterClass RC, PatFrag memop_frag, - X86MemOperand x86memop> { - def rr : AVX512BI, EVEX_4V; - def rm : AVX512BI, EVEX_4V; -} -defm VPUNPCKLDQZ : avx512_unpack_int<0x62, "vpunpckldq", X86Unpckl, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKLQDQZ : avx512_unpack_int<0x6C, "vpunpcklqdq", X86Unpckl, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; -defm VPUNPCKHDQZ : avx512_unpack_int<0x6A, "vpunpckhdq", X86Unpckh, v16i32, - VR512, loadv16i32, i512mem>, EVEX_V512, - EVEX_CD8<32, CD8VF>; -defm VPUNPCKHQDQZ : avx512_unpack_int<0x6D, "vpunpckhqdq", X86Unpckh, v8i64, - VR512, loadv8i64, i512mem>, EVEX_V512, - VEX_W, EVEX_CD8<64, CD8VF>; //===----------------------------------------------------------------------===// // AVX-512 Logical Instructions //===----------------------------------------------------------------------===// @@ -6812,3 +6753,27 @@ (bc_v8i64 (v8i1sextv8i64)), (bc_v8i64 (add (v8i64 VR512:$src), (v8i1sextv8i64)))), (VPABSQZrr VR512:$src)>; + +//===----------------------------------------------------------------------===// +// AVX-512 - Unpack Instructions +//===----------------------------------------------------------------------===// +defm VUNPCKH : avx512_fp_binop_p<0x15, "vunpckh", X86Unpckh>; +defm VUNPCKL : avx512_fp_binop_p<0x14, "vunpckl", X86Unpckl>; + +defm VPUNPCKLBW : avx512_binop_rm_vl_b<0x60, "vpunpcklbw", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI, 0, "">; +defm VPUNPCKHBW : avx512_binop_rm_vl_b<0x68, "vpunpckhbw", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI, 0, "">; +defm VPUNPCKLWD : avx512_binop_rm_vl_w<0x61, "vpunpcklwd", X86Unpckl, + SSE_INTALU_ITINS_P, HasBWI, 0, "">; +defm VPUNPCKHWD : avx512_binop_rm_vl_w<0x69, "vpunpckhwd", X86Unpckh, + SSE_INTALU_ITINS_P, HasBWI, 0, "">; + +defm VPUNPCKLDQ : avx512_binop_rm_vl_d<0x62, "vpunpckldq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512, 0, "">; +defm VPUNPCKHDQ : avx512_binop_rm_vl_d<0x6A, "vpunpckhdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512, 0, "">; +defm VPUNPCKLQDQ : avx512_binop_rm_vl_q<0x6C, "vpunpcklqdq", X86Unpckl, + SSE_INTALU_ITINS_P, HasAVX512, 0, "">; +defm VPUNPCKHQDQ : avx512_binop_rm_vl_q<0x6D, "vpunpckhqdq", X86Unpckh, + SSE_INTALU_ITINS_P, HasAVX512, 0, "">; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -2698,6 +2698,7 @@ Sched<[WriteFShuffleLd, ReadAfterLd]>; } +let Predicates = [HasAVX, NoVLX] in { defm VUNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, loadv4f32, VR128, f128mem, "unpckhps\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedSingle>, PS, VEX_4V; @@ -2723,7 +2724,7 @@ defm VUNPCKLPDY: sse12_unpack_interleave<0x14, X86Unpckl, v4f64, loadv4f64, VR256, f256mem, "unpcklpd\t{$src2, $src1, $dst|$dst, $src1, $src2}", SSEPackedDouble>, PD, VEX_4V, VEX_L; - +}// Predicates = [HasAVX, NoVLX] let Constraints = "$src1 = $dst" in { defm UNPCKHPS: sse12_unpack_interleave<0x15, X86Unpckh, v4f32, memopv4f32, VR128, f128mem, "unpckhps\t{$src2, $dst|$dst, $src2}", @@ -4507,40 +4508,43 @@ Sched<[WriteShuffleLd, ReadAfterLd]>; } -let Predicates = [HasAVX] in { + +let Predicates = [HasAVX, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack<0x60, "vpunpcklbw", v16i8, X86Unpckl, bc_v16i8, loadv2i64, 0>, VEX_4V; defm VPUNPCKLWD : sse2_unpack<0x61, "vpunpcklwd", v8i16, X86Unpckl, bc_v8i16, loadv2i64, 0>, VEX_4V; - defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, - bc_v4i32, loadv2i64, 0>, VEX_4V; - defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, - bc_v2i64, loadv2i64, 0>, VEX_4V; - defm VPUNPCKHBW : sse2_unpack<0x68, "vpunpckhbw", v16i8, X86Unpckh, bc_v16i8, loadv2i64, 0>, VEX_4V; defm VPUNPCKHWD : sse2_unpack<0x69, "vpunpckhwd", v8i16, X86Unpckh, bc_v8i16, loadv2i64, 0>, VEX_4V; +} +let Predicates = [HasAVX, NoVLX] in { + defm VPUNPCKLDQ : sse2_unpack<0x62, "vpunpckldq", v4i32, X86Unpckl, + bc_v4i32, loadv2i64, 0>, VEX_4V; + defm VPUNPCKLQDQ : sse2_unpack<0x6C, "vpunpcklqdq", v2i64, X86Unpckl, + bc_v2i64, loadv2i64, 0>, VEX_4V; defm VPUNPCKHDQ : sse2_unpack<0x6A, "vpunpckhdq", v4i32, X86Unpckh, bc_v4i32, loadv2i64, 0>, VEX_4V; defm VPUNPCKHQDQ : sse2_unpack<0x6D, "vpunpckhqdq", v2i64, X86Unpckh, bc_v2i64, loadv2i64, 0>, VEX_4V; } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX_Or_NoBWI] in { defm VPUNPCKLBW : sse2_unpack_y<0x60, "vpunpcklbw", v32i8, X86Unpckl, bc_v32i8>, VEX_4V, VEX_L; defm VPUNPCKLWD : sse2_unpack_y<0x61, "vpunpcklwd", v16i16, X86Unpckl, bc_v16i16>, VEX_4V, VEX_L; - defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, - bc_v8i32>, VEX_4V, VEX_L; - defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, - bc_v4i64>, VEX_4V, VEX_L; - defm VPUNPCKHBW : sse2_unpack_y<0x68, "vpunpckhbw", v32i8, X86Unpckh, bc_v32i8>, VEX_4V, VEX_L; defm VPUNPCKHWD : sse2_unpack_y<0x69, "vpunpckhwd", v16i16, X86Unpckh, bc_v16i16>, VEX_4V, VEX_L; +} +let Predicates = [HasAVX2, NoVLX] in { + defm VPUNPCKLDQ : sse2_unpack_y<0x62, "vpunpckldq", v8i32, X86Unpckl, + bc_v8i32>, VEX_4V, VEX_L; + defm VPUNPCKLQDQ : sse2_unpack_y<0x6C, "vpunpcklqdq", v4i64, X86Unpckl, + bc_v4i64>, VEX_4V, VEX_L; defm VPUNPCKHDQ : sse2_unpack_y<0x6A, "vpunpckhdq", v8i32, X86Unpckh, bc_v8i32>, VEX_4V, VEX_L; defm VPUNPCKHQDQ : sse2_unpack_y<0x6D, "vpunpckhqdq", v4i64, X86Unpckh, Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -1043,6 +1043,54 @@ X86_INTRINSIC_DATA(avx512_mask_psubus_w_128, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_mask_psubus_w_256, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), X86_INTRINSIC_DATA(avx512_mask_psubus_w_512, INTR_TYPE_2OP_MASK, X86ISD::SUBUS, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhb_w_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhd_q_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhqd_q_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckhw_d_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklb_w_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckld_q_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckld_q_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpckld_q_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklqd_q_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_punpcklw_d_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_128, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_256, INTR_TYPE_2OP_MASK, ISD::XOR, 0), X86_INTRINSIC_DATA(avx512_mask_pxor_d_512, INTR_TYPE_2OP_MASK, ISD::XOR, 0), @@ -1123,6 +1171,30 @@ X86_INTRINSIC_DATA(avx512_mask_ucmp_w_128, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_256, CMP_MASK_CC, X86ISD::CMPMU, 0), X86_INTRINSIC_DATA(avx512_mask_ucmp_w_512, CMP_MASK_CC, X86ISD::CMPMU, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckh_pd_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckh_ps_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKH, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckl_pd_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_128, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_256, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), + X86_INTRINSIC_DATA(avx512_mask_unpckl_ps_512, INTR_TYPE_2OP_MASK, + X86ISD::UNPCKL, 0), X86_INTRINSIC_DATA(avx512_mask_valign_d_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), X86_INTRINSIC_DATA(avx512_mask_valign_q_512, INTR_TYPE_3OP_MASK, X86ISD::VALIGN, 0), Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -3119,6 +3119,134 @@ ret <16 x float> %res2 } +declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vunpckhpd %zmm1, %zmm0, %zmm0 + %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vunpckhps %zmm1, %zmm0, %zmm0 + %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vunpcklpd %zmm1, %zmm0, %zmm0 + %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) + %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1) + %res2 = fadd <8 x double> %res, %res1 + ret <8 x double> %res2 +} + +declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vunpcklps %zmm1, %zmm0, %zmm0 + %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) + %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1) + %res2 = fadd <16 x float> %res, %res1 + ret <16 x float> %res2 +} + +declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpcklqdq %zmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vpunpcklqdq {{.*#+}} +; CHECK: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res2, %res3 + ret <8 x i64> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpckhqdq {{.*#+}} +; CHECK: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res2 = add <8 x i64> %res, %res1 + ret <8 x i64> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpckhdq {{.*#+}} +; CHECK: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + +declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpckldq {{.*#+}} +; CHECK: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1) + %res2 = add <16 x i32> %res, %res1 + ret <16 x i32> %res2 +} + declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.512(<8 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_512(<8 x i64> %x0, <16 x i8> %x1, i8 %x2) { Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1118,3 +1118,66 @@ ret <16 x i32> %res2 } +declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpckhbw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpcklbw %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) + %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) + %res2 = add <64 x i8> %res, %res1 + ret <64 x i8> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpckhwd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} + +declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vpunpcklwd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) + %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1) + %res2 = add <32 x i16> %res, %res1 + ret <32 x i16> %res2 +} \ No newline at end of file Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4098,3 +4098,99 @@ %res2 = add <16 x i16> %res, %res1 ret <16 x i16> %res2 } + +declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_128: +; CHECK: vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpckhbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x68,0xc1] + %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_128: +; CHECK: vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpcklbw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x60,0xc1] + %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) + %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) + %res2 = add <16 x i8> %res, %res1 + ret <16 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhb_w_256: +; CHECK: vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpckhbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x68,0xc1] + %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklb_w_256: +; CHECK: vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpcklbw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x60,0xc1] + %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) + %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) + %res2 = add <32 x i8> %res, %res1 + ret <32 x i8> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_128: +; CHECK: vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpcklwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x61,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_128: +; CHECK: vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpckhwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x69,0xc1] + %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) + %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1) + %res2 = add <8 x i16> %res, %res1 + ret <8 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklw_d_256: +; CHECK: vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpcklwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x61,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} + +declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhw_d_256: +; CHECK: vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpckhwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x69,0xc1] + %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) + %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1) + %res2 = add <16 x i16> %res, %res1 + ret <16 x i16> %res2 +} Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -3005,6 +3005,200 @@ ret <8 x float> %res2 } +declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128: +; CHECK: vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vunpckhpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xc1] + %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256: +; CHECK: vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vunpckhpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xc1] + %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128: +; CHECK: vunpckhps %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vunpckhps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xc1] + %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256: +; CHECK: ## BB#0: +; CHECK: vunpckhps %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vunpckhps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xc1] + %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) + +define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128: +; CHECK: vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vunpcklpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xc1] + %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) + %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1) + %res2 = fadd <2 x double> %res, %res1 + ret <2 x double> %res2 +} + +declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256: +; CHECK: vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vunpcklpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xc1] + %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) + %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1) + %res2 = fadd <4 x double> %res, %res1 + ret <4 x double> %res2 +} + +declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) + +define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128: +; CHECK: vunpcklps %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vunpcklps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xc1] + %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) + %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1) + %res2 = fadd <4 x float> %res, %res1 + ret <4 x float> %res2 +} + +declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256: +; CHECK: vunpcklps %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vunpcklps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xc1] + %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) + %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1) + %res2 = fadd <8 x float> %res, %res1 + ret <8 x float> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128: +; CHECK: vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpckhdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128: +; CHECK: vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpckldq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xc1] + %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) + %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1) + %res2 = add <4 x i32> %res, %res1 + ret <4 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256: +; CHECK: ## BB#0: +; CHECK: vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpckhdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256: +; CHECK: vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpckldq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xc1] + %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1) + %res2 = add <8 x i32> %res, %res1 + ret <8 x i32> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) + +define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128: +; CHECK: vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpckhqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xc1] + %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) + +define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128: +; CHECK: vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} +; CHECK-NEXT: vpunpcklqdq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xc1] + %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res2 = add <2 x i64> %res, %res1 + ret <2 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256: +; CHECK: vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpcklqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xc1] + %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + +declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { +; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256: +; CHECK: vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vpunpckhqdq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xc1] + %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) + %res2 = add <4 x i64> %res, %res1 + ret <4 x i64> %res2 +} + declare <16 x i8> @llvm.x86.avx512.mask.pmov.qb.128(<2 x i64>, <16 x i8>, i8) define <16 x i8>@test_int_x86_avx512_mask_pmov_qb_128(<2 x i64> %x0, <16 x i8> %x1, i8 %x2) { Index: test/MC/X86/avx512-encodings.s =================================================================== --- test/MC/X86/avx512-encodings.s +++ test/MC/X86/avx512-encodings.s @@ -14430,3 +14430,451 @@ // CHECK: encoding: [0x62,0xf2,0xdd,0x00,0xbf,0xb2,0xf8,0xfb,0xff,0xff] vfnmsub231sd -1032(%rdx), %xmm20, %xmm6 +// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xb1,0x0c,0x48,0x15,0xe8] + vunpckhps %zmm16, %zmm14, %zmm5 + +// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 {%k6} +// CHECK: encoding: [0x62,0xb1,0x0c,0x4e,0x15,0xe8] + vunpckhps %zmm16, %zmm14, %zmm5 {%k6} + +// CHECK: vunpckhps %zmm16, %zmm14, %zmm5 {%k6} {z} +// CHECK: encoding: [0x62,0xb1,0x0c,0xce,0x15,0xe8] + vunpckhps %zmm16, %zmm14, %zmm5 {%k6} {z} + +// CHECK: vunpckhps (%rcx), %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0x29] + vunpckhps (%rcx), %zmm14, %zmm5 + +// CHECK: vunpckhps 291(%rax,%r14,8), %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xb1,0x0c,0x48,0x15,0xac,0xf0,0x23,0x01,0x00,0x00] + vunpckhps 291(%rax,%r14,8), %zmm14, %zmm5 + +// CHECK: vunpckhps (%rcx){1to16}, %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0x29] + vunpckhps (%rcx){1to16}, %zmm14, %zmm5 + +// CHECK: vunpckhps 8128(%rdx), %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0x6a,0x7f] + vunpckhps 8128(%rdx), %zmm14, %zmm5 + +// CHECK: vunpckhps 8192(%rdx), %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0xaa,0x00,0x20,0x00,0x00] + vunpckhps 8192(%rdx), %zmm14, %zmm5 + +// CHECK: vunpckhps -8192(%rdx), %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0x6a,0x80] + vunpckhps -8192(%rdx), %zmm14, %zmm5 + +// CHECK: vunpckhps -8256(%rdx), %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x48,0x15,0xaa,0xc0,0xdf,0xff,0xff] + vunpckhps -8256(%rdx), %zmm14, %zmm5 + +// CHECK: vunpckhps 508(%rdx){1to16}, %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0x6a,0x7f] + vunpckhps 508(%rdx){1to16}, %zmm14, %zmm5 + +// CHECK: vunpckhps 512(%rdx){1to16}, %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0xaa,0x00,0x02,0x00,0x00] + vunpckhps 512(%rdx){1to16}, %zmm14, %zmm5 + +// CHECK: vunpckhps -512(%rdx){1to16}, %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0x6a,0x80] + vunpckhps -512(%rdx){1to16}, %zmm14, %zmm5 + +// CHECK: vunpckhps -516(%rdx){1to16}, %zmm14, %zmm5 +// CHECK: encoding: [0x62,0xf1,0x0c,0x58,0x15,0xaa,0xfc,0xfd,0xff,0xff] + vunpckhps -516(%rdx){1to16}, %zmm14, %zmm5 + +// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0xca] + vunpcklps %zmm2, %zmm3, %zmm1 + +// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 {%k3} +// CHECK: encoding: [0x62,0xf1,0x64,0x4b,0x14,0xca] + vunpcklps %zmm2, %zmm3, %zmm1 {%k3} + +// CHECK: vunpcklps %zmm2, %zmm3, %zmm1 {%k3} {z} +// CHECK: encoding: [0x62,0xf1,0x64,0xcb,0x14,0xca] + vunpcklps %zmm2, %zmm3, %zmm1 {%k3} {z} + +// CHECK: vunpcklps (%rcx), %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x09] + vunpcklps (%rcx), %zmm3, %zmm1 + +// CHECK: vunpcklps 291(%rax,%r14,8), %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xb1,0x64,0x48,0x14,0x8c,0xf0,0x23,0x01,0x00,0x00] + vunpcklps 291(%rax,%r14,8), %zmm3, %zmm1 + +// CHECK: vunpcklps (%rcx){1to16}, %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x09] + vunpcklps (%rcx){1to16}, %zmm3, %zmm1 + +// CHECK: vunpcklps 8128(%rdx), %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x4a,0x7f] + vunpcklps 8128(%rdx), %zmm3, %zmm1 + +// CHECK: vunpcklps 8192(%rdx), %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x8a,0x00,0x20,0x00,0x00] + vunpcklps 8192(%rdx), %zmm3, %zmm1 + +// CHECK: vunpcklps -8192(%rdx), %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x4a,0x80] + vunpcklps -8192(%rdx), %zmm3, %zmm1 + +// CHECK: vunpcklps -8256(%rdx), %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x48,0x14,0x8a,0xc0,0xdf,0xff,0xff] + vunpcklps -8256(%rdx), %zmm3, %zmm1 + +// CHECK: vunpcklps 508(%rdx){1to16}, %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x4a,0x7f] + vunpcklps 508(%rdx){1to16}, %zmm3, %zmm1 + +// CHECK: vunpcklps 512(%rdx){1to16}, %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x8a,0x00,0x02,0x00,0x00] + vunpcklps 512(%rdx){1to16}, %zmm3, %zmm1 + +// CHECK: vunpcklps -512(%rdx){1to16}, %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x4a,0x80] + vunpcklps -512(%rdx){1to16}, %zmm3, %zmm1 + +// CHECK: vunpcklps -516(%rdx){1to16}, %zmm3, %zmm1 +// CHECK: encoding: [0x62,0xf1,0x64,0x58,0x14,0x8a,0xfc,0xfd,0xff,0xff] + vunpcklps -516(%rdx){1to16}, %zmm3, %zmm1 + +// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x01,0xe5,0x40,0x15,0xca] + vunpckhpd %zmm26, %zmm19, %zmm25 + +// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} +// CHECK: encoding: [0x62,0x01,0xe5,0x45,0x15,0xca] + vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} + +// CHECK: vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} {z} +// CHECK: encoding: [0x62,0x01,0xe5,0xc5,0x15,0xca] + vunpckhpd %zmm26, %zmm19, %zmm25 {%k5} {z} + +// CHECK: vunpckhpd (%rcx), %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x09] + vunpckhpd (%rcx), %zmm19, %zmm25 + +// CHECK: vunpckhpd 291(%rax,%r14,8), %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x21,0xe5,0x40,0x15,0x8c,0xf0,0x23,0x01,0x00,0x00] + vunpckhpd 291(%rax,%r14,8), %zmm19, %zmm25 + +// CHECK: vunpckhpd (%rcx){1to8}, %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x09] + vunpckhpd (%rcx){1to8}, %zmm19, %zmm25 + +// CHECK: vunpckhpd 8128(%rdx), %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x4a,0x7f] + vunpckhpd 8128(%rdx), %zmm19, %zmm25 + +// CHECK: vunpckhpd 8192(%rdx), %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x8a,0x00,0x20,0x00,0x00] + vunpckhpd 8192(%rdx), %zmm19, %zmm25 + +// CHECK: vunpckhpd -8192(%rdx), %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x4a,0x80] + vunpckhpd -8192(%rdx), %zmm19, %zmm25 + +// CHECK: vunpckhpd -8256(%rdx), %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x40,0x15,0x8a,0xc0,0xdf,0xff,0xff] + vunpckhpd -8256(%rdx), %zmm19, %zmm25 + +// CHECK: vunpckhpd 1016(%rdx){1to8}, %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x4a,0x7f] + vunpckhpd 1016(%rdx){1to8}, %zmm19, %zmm25 + +// CHECK: vunpckhpd 1024(%rdx){1to8}, %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x8a,0x00,0x04,0x00,0x00] + vunpckhpd 1024(%rdx){1to8}, %zmm19, %zmm25 + +// CHECK: vunpckhpd -1024(%rdx){1to8}, %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x4a,0x80] + vunpckhpd -1024(%rdx){1to8}, %zmm19, %zmm25 + +// CHECK: vunpckhpd -1032(%rdx){1to8}, %zmm19, %zmm25 +// CHECK: encoding: [0x62,0x61,0xe5,0x50,0x15,0x8a,0xf8,0xfb,0xff,0xff] + vunpckhpd -1032(%rdx){1to8}, %zmm19, %zmm25 + +// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x14,0xd5] + vunpcklpd %zmm21, %zmm29, %zmm18 + +// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} +// CHECK: encoding: [0x62,0xa1,0x95,0x46,0x14,0xd5] + vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} + +// CHECK: vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x95,0xc6,0x14,0xd5] + vunpcklpd %zmm21, %zmm29, %zmm18 {%k6} {z} + +// CHECK: vunpcklpd (%rcx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x11] + vunpcklpd (%rcx), %zmm29, %zmm18 + +// CHECK: vunpcklpd 291(%rax,%r14,8), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xa1,0x95,0x40,0x14,0x94,0xf0,0x23,0x01,0x00,0x00] + vunpcklpd 291(%rax,%r14,8), %zmm29, %zmm18 + +// CHECK: vunpcklpd (%rcx){1to8}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x11] + vunpcklpd (%rcx){1to8}, %zmm29, %zmm18 + +// CHECK: vunpcklpd 8128(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x52,0x7f] + vunpcklpd 8128(%rdx), %zmm29, %zmm18 + +// CHECK: vunpcklpd 8192(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x92,0x00,0x20,0x00,0x00] + vunpcklpd 8192(%rdx), %zmm29, %zmm18 + +// CHECK: vunpcklpd -8192(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x52,0x80] + vunpcklpd -8192(%rdx), %zmm29, %zmm18 + +// CHECK: vunpcklpd -8256(%rdx), %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x40,0x14,0x92,0xc0,0xdf,0xff,0xff] + vunpcklpd -8256(%rdx), %zmm29, %zmm18 + +// CHECK: vunpcklpd 1016(%rdx){1to8}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x52,0x7f] + vunpcklpd 1016(%rdx){1to8}, %zmm29, %zmm18 + +// CHECK: vunpcklpd 1024(%rdx){1to8}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x92,0x00,0x04,0x00,0x00] + vunpcklpd 1024(%rdx){1to8}, %zmm29, %zmm18 + +// CHECK: vunpcklpd -1024(%rdx){1to8}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x52,0x80] + vunpcklpd -1024(%rdx){1to8}, %zmm29, %zmm18 + +// CHECK: vunpcklpd -1032(%rdx){1to8}, %zmm29, %zmm18 +// CHECK: encoding: [0x62,0xe1,0x95,0x50,0x14,0x92,0xf8,0xfb,0xff,0xff] + vunpcklpd -1032(%rdx){1to8}, %zmm29, %zmm18 + +// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x21,0x65,0x48,0x62,0xc1] + vpunpckldq %zmm17, %zmm3, %zmm24 + +// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} +// CHECK: encoding: [0x62,0x21,0x65,0x4b,0x62,0xc1] + vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} + +// CHECK: vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0x65,0xcb,0x62,0xc1] + vpunpckldq %zmm17, %zmm3, %zmm24 {%k3} {z} + +// CHECK: vpunpckldq (%rcx), %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x01] + vpunpckldq (%rcx), %zmm3, %zmm24 + +// CHECK: vpunpckldq 291(%rax,%r14,8), %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x21,0x65,0x48,0x62,0x84,0xf0,0x23,0x01,0x00,0x00] + vpunpckldq 291(%rax,%r14,8), %zmm3, %zmm24 + +// CHECK: vpunpckldq (%rcx){1to16}, %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x01] + vpunpckldq (%rcx){1to16}, %zmm3, %zmm24 + +// CHECK: vpunpckldq 8128(%rdx), %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x42,0x7f] + vpunpckldq 8128(%rdx), %zmm3, %zmm24 + +// CHECK: vpunpckldq 8192(%rdx), %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x82,0x00,0x20,0x00,0x00] + vpunpckldq 8192(%rdx), %zmm3, %zmm24 + +// CHECK: vpunpckldq -8192(%rdx), %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x42,0x80] + vpunpckldq -8192(%rdx), %zmm3, %zmm24 + +// CHECK: vpunpckldq -8256(%rdx), %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x48,0x62,0x82,0xc0,0xdf,0xff,0xff] + vpunpckldq -8256(%rdx), %zmm3, %zmm24 + +// CHECK: vpunpckldq 508(%rdx){1to16}, %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x42,0x7f] + vpunpckldq 508(%rdx){1to16}, %zmm3, %zmm24 + +// CHECK: vpunpckldq 512(%rdx){1to16}, %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x82,0x00,0x02,0x00,0x00] + vpunpckldq 512(%rdx){1to16}, %zmm3, %zmm24 + +// CHECK: vpunpckldq -512(%rdx){1to16}, %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x42,0x80] + vpunpckldq -512(%rdx){1to16}, %zmm3, %zmm24 + +// CHECK: vpunpckldq -516(%rdx){1to16}, %zmm3, %zmm24 +// CHECK: encoding: [0x62,0x61,0x65,0x58,0x62,0x82,0xfc,0xfd,0xff,0xff] + vpunpckldq -516(%rdx){1to16}, %zmm3, %zmm24 + +// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xd1,0x5d,0x48,0x6a,0xf5] + vpunpckhdq %zmm13, %zmm4, %zmm6 + +// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} +// CHECK: encoding: [0x62,0xd1,0x5d,0x4d,0x6a,0xf5] + vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} + +// CHECK: vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} {z} +// CHECK: encoding: [0x62,0xd1,0x5d,0xcd,0x6a,0xf5] + vpunpckhdq %zmm13, %zmm4, %zmm6 {%k5} {z} + +// CHECK: vpunpckhdq (%rcx), %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x31] + vpunpckhdq (%rcx), %zmm4, %zmm6 + +// CHECK: vpunpckhdq 291(%rax,%r14,8), %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xb1,0x5d,0x48,0x6a,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhdq 291(%rax,%r14,8), %zmm4, %zmm6 + +// CHECK: vpunpckhdq (%rcx){1to16}, %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x31] + vpunpckhdq (%rcx){1to16}, %zmm4, %zmm6 + +// CHECK: vpunpckhdq 8128(%rdx), %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x72,0x7f] + vpunpckhdq 8128(%rdx), %zmm4, %zmm6 + +// CHECK: vpunpckhdq 8192(%rdx), %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0xb2,0x00,0x20,0x00,0x00] + vpunpckhdq 8192(%rdx), %zmm4, %zmm6 + +// CHECK: vpunpckhdq -8192(%rdx), %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0x72,0x80] + vpunpckhdq -8192(%rdx), %zmm4, %zmm6 + +// CHECK: vpunpckhdq -8256(%rdx), %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x48,0x6a,0xb2,0xc0,0xdf,0xff,0xff] + vpunpckhdq -8256(%rdx), %zmm4, %zmm6 + +// CHECK: vpunpckhdq 508(%rdx){1to16}, %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x72,0x7f] + vpunpckhdq 508(%rdx){1to16}, %zmm4, %zmm6 + +// CHECK: vpunpckhdq 512(%rdx){1to16}, %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0xb2,0x00,0x02,0x00,0x00] + vpunpckhdq 512(%rdx){1to16}, %zmm4, %zmm6 + +// CHECK: vpunpckhdq -512(%rdx){1to16}, %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0x72,0x80] + vpunpckhdq -512(%rdx){1to16}, %zmm4, %zmm6 + +// CHECK: vpunpckhdq -516(%rdx){1to16}, %zmm4, %zmm6 +// CHECK: encoding: [0x62,0xf1,0x5d,0x58,0x6a,0xb2,0xfc,0xfd,0xff,0xff] + vpunpckhdq -516(%rdx){1to16}, %zmm4, %zmm6 + +// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xb1,0xdd,0x48,0x6c,0xd9] + vpunpcklqdq %zmm17, %zmm4, %zmm3 + +// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} +// CHECK: encoding: [0x62,0xb1,0xdd,0x49,0x6c,0xd9] + vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} + +// CHECK: vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} {z} +// CHECK: encoding: [0x62,0xb1,0xdd,0xc9,0x6c,0xd9] + vpunpcklqdq %zmm17, %zmm4, %zmm3 {%k1} {z} + +// CHECK: vpunpcklqdq (%rcx), %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x19] + vpunpcklqdq (%rcx), %zmm4, %zmm3 + +// CHECK: vpunpcklqdq 291(%rax,%r14,8), %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xb1,0xdd,0x48,0x6c,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpunpcklqdq 291(%rax,%r14,8), %zmm4, %zmm3 + +// CHECK: vpunpcklqdq (%rcx){1to8}, %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x19] + vpunpcklqdq (%rcx){1to8}, %zmm4, %zmm3 + +// CHECK: vpunpcklqdq 8128(%rdx), %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x5a,0x7f] + vpunpcklqdq 8128(%rdx), %zmm4, %zmm3 + +// CHECK: vpunpcklqdq 8192(%rdx), %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x9a,0x00,0x20,0x00,0x00] + vpunpcklqdq 8192(%rdx), %zmm4, %zmm3 + +// CHECK: vpunpcklqdq -8192(%rdx), %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x5a,0x80] + vpunpcklqdq -8192(%rdx), %zmm4, %zmm3 + +// CHECK: vpunpcklqdq -8256(%rdx), %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x48,0x6c,0x9a,0xc0,0xdf,0xff,0xff] + vpunpcklqdq -8256(%rdx), %zmm4, %zmm3 + +// CHECK: vpunpcklqdq 1016(%rdx){1to8}, %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x5a,0x7f] + vpunpcklqdq 1016(%rdx){1to8}, %zmm4, %zmm3 + +// CHECK: vpunpcklqdq 1024(%rdx){1to8}, %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x9a,0x00,0x04,0x00,0x00] + vpunpcklqdq 1024(%rdx){1to8}, %zmm4, %zmm3 + +// CHECK: vpunpcklqdq -1024(%rdx){1to8}, %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x5a,0x80] + vpunpcklqdq -1024(%rdx){1to8}, %zmm4, %zmm3 + +// CHECK: vpunpcklqdq -1032(%rdx){1to8}, %zmm4, %zmm3 +// CHECK: encoding: [0x62,0xf1,0xdd,0x58,0x6c,0x9a,0xf8,0xfb,0xff,0xff] + vpunpcklqdq -1032(%rdx){1to8}, %zmm4, %zmm3 + +// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x21,0x85,0x48,0x6d,0xd8] + vpunpckhqdq %zmm16, %zmm15, %zmm27 + +// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} +// CHECK: encoding: [0x62,0x21,0x85,0x4b,0x6d,0xd8] + vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} + +// CHECK: vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} {z} +// CHECK: encoding: [0x62,0x21,0x85,0xcb,0x6d,0xd8] + vpunpckhqdq %zmm16, %zmm15, %zmm27 {%k3} {z} + +// CHECK: vpunpckhqdq (%rcx), %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x19] + vpunpckhqdq (%rcx), %zmm15, %zmm27 + +// CHECK: vpunpckhqdq 291(%rax,%r14,8), %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x21,0x85,0x48,0x6d,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpunpckhqdq 291(%rax,%r14,8), %zmm15, %zmm27 + +// CHECK: vpunpckhqdq (%rcx){1to8}, %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x19] + vpunpckhqdq (%rcx){1to8}, %zmm15, %zmm27 + +// CHECK: vpunpckhqdq 8128(%rdx), %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x5a,0x7f] + vpunpckhqdq 8128(%rdx), %zmm15, %zmm27 + +// CHECK: vpunpckhqdq 8192(%rdx), %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x9a,0x00,0x20,0x00,0x00] + vpunpckhqdq 8192(%rdx), %zmm15, %zmm27 + +// CHECK: vpunpckhqdq -8192(%rdx), %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x5a,0x80] + vpunpckhqdq -8192(%rdx), %zmm15, %zmm27 + +// CHECK: vpunpckhqdq -8256(%rdx), %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x48,0x6d,0x9a,0xc0,0xdf,0xff,0xff] + vpunpckhqdq -8256(%rdx), %zmm15, %zmm27 + +// CHECK: vpunpckhqdq 1016(%rdx){1to8}, %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x5a,0x7f] + vpunpckhqdq 1016(%rdx){1to8}, %zmm15, %zmm27 + +// CHECK: vpunpckhqdq 1024(%rdx){1to8}, %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0x00,0x04,0x00,0x00] + vpunpckhqdq 1024(%rdx){1to8}, %zmm15, %zmm27 + +// CHECK: vpunpckhqdq -1024(%rdx){1to8}, %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x5a,0x80] + vpunpckhqdq -1024(%rdx){1to8}, %zmm15, %zmm27 + +// CHECK: vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27 +// CHECK: encoding: [0x62,0x61,0x85,0x58,0x6d,0x9a,0xf8,0xfb,0xff,0xff] + vpunpckhqdq -1032(%rdx){1to8}, %zmm15, %zmm27 + Index: test/MC/X86/x86-64-avx512bw.s =================================================================== --- test/MC/X86/x86-64-avx512bw.s +++ test/MC/X86/x86-64-avx512bw.s @@ -3968,3 +3968,147 @@ // CHECK: encoding: [0x62,0x61,0x4d,0x40,0xf5,0x92,0xc0,0xdf,0xff,0xff] vpmaddwd -8256(%rdx), %zmm22, %zmm26 +// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x01,0x75,0x40,0x60,0xe0] + vpunpcklbw %zmm24, %zmm17, %zmm28 + +// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} +// CHECK: encoding: [0x62,0x01,0x75,0x41,0x60,0xe0] + vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} + +// CHECK: vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} {z} +// CHECK: encoding: [0x62,0x01,0x75,0xc1,0x60,0xe0] + vpunpcklbw %zmm24, %zmm17, %zmm28 {%k1} {z} + +// CHECK: vpunpcklbw (%rcx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0x21] + vpunpcklbw (%rcx), %zmm17, %zmm28 + +// CHECK: vpunpcklbw 291(%rax,%r14,8), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x21,0x75,0x40,0x60,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpcklbw 291(%rax,%r14,8), %zmm17, %zmm28 + +// CHECK: vpunpcklbw 8128(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0x62,0x7f] + vpunpcklbw 8128(%rdx), %zmm17, %zmm28 + +// CHECK: vpunpcklbw 8192(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0xa2,0x00,0x20,0x00,0x00] + vpunpcklbw 8192(%rdx), %zmm17, %zmm28 + +// CHECK: vpunpcklbw -8192(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0x62,0x80] + vpunpcklbw -8192(%rdx), %zmm17, %zmm28 + +// CHECK: vpunpcklbw -8256(%rdx), %zmm17, %zmm28 +// CHECK: encoding: [0x62,0x61,0x75,0x40,0x60,0xa2,0xc0,0xdf,0xff,0xff] + vpunpcklbw -8256(%rdx), %zmm17, %zmm28 + +// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x21,0x65,0x40,0x68,0xf7] + vpunpckhbw %zmm23, %zmm19, %zmm30 + +// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} +// CHECK: encoding: [0x62,0x21,0x65,0x47,0x68,0xf7] + vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} + +// CHECK: vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x65,0xc7,0x68,0xf7] + vpunpckhbw %zmm23, %zmm19, %zmm30 {%k7} {z} + +// CHECK: vpunpckhbw (%rcx), %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0x31] + vpunpckhbw (%rcx), %zmm19, %zmm30 + +// CHECK: vpunpckhbw 291(%rax,%r14,8), %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x21,0x65,0x40,0x68,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhbw 291(%rax,%r14,8), %zmm19, %zmm30 + +// CHECK: vpunpckhbw 8128(%rdx), %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0x72,0x7f] + vpunpckhbw 8128(%rdx), %zmm19, %zmm30 + +// CHECK: vpunpckhbw 8192(%rdx), %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0xb2,0x00,0x20,0x00,0x00] + vpunpckhbw 8192(%rdx), %zmm19, %zmm30 + +// CHECK: vpunpckhbw -8192(%rdx), %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0x72,0x80] + vpunpckhbw -8192(%rdx), %zmm19, %zmm30 + +// CHECK: vpunpckhbw -8256(%rdx), %zmm19, %zmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x40,0x68,0xb2,0xc0,0xdf,0xff,0xff] + vpunpckhbw -8256(%rdx), %zmm19, %zmm30 + +// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0x61,0xe2] + vpunpcklwd %zmm18, %zmm24, %zmm20 + +// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} +// CHECK: encoding: [0x62,0xa1,0x3d,0x47,0x61,0xe2] + vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} + +// CHECK: vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x3d,0xc7,0x61,0xe2] + vpunpcklwd %zmm18, %zmm24, %zmm20 {%k7} {z} + +// CHECK: vpunpcklwd (%rcx), %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0x21] + vpunpcklwd (%rcx), %zmm24, %zmm20 + +// CHECK: vpunpcklwd 291(%rax,%r14,8), %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xa1,0x3d,0x40,0x61,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpcklwd 291(%rax,%r14,8), %zmm24, %zmm20 + +// CHECK: vpunpcklwd 8128(%rdx), %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0x62,0x7f] + vpunpcklwd 8128(%rdx), %zmm24, %zmm20 + +// CHECK: vpunpcklwd 8192(%rdx), %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0xa2,0x00,0x20,0x00,0x00] + vpunpcklwd 8192(%rdx), %zmm24, %zmm20 + +// CHECK: vpunpcklwd -8192(%rdx), %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0x62,0x80] + vpunpcklwd -8192(%rdx), %zmm24, %zmm20 + +// CHECK: vpunpcklwd -8256(%rdx), %zmm24, %zmm20 +// CHECK: encoding: [0x62,0xe1,0x3d,0x40,0x61,0xa2,0xc0,0xdf,0xff,0xff] + vpunpcklwd -8256(%rdx), %zmm24, %zmm20 + +// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x01,0x2d,0x40,0x69,0xf0] + vpunpckhwd %zmm24, %zmm26, %zmm30 + +// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} +// CHECK: encoding: [0x62,0x01,0x2d,0x44,0x69,0xf0] + vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} + +// CHECK: vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x2d,0xc4,0x69,0xf0] + vpunpckhwd %zmm24, %zmm26, %zmm30 {%k4} {z} + +// CHECK: vpunpckhwd (%rcx), %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0x31] + vpunpckhwd (%rcx), %zmm26, %zmm30 + +// CHECK: vpunpckhwd 291(%rax,%r14,8), %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x21,0x2d,0x40,0x69,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhwd 291(%rax,%r14,8), %zmm26, %zmm30 + +// CHECK: vpunpckhwd 8128(%rdx), %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0x72,0x7f] + vpunpckhwd 8128(%rdx), %zmm26, %zmm30 + +// CHECK: vpunpckhwd 8192(%rdx), %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0x00,0x20,0x00,0x00] + vpunpckhwd 8192(%rdx), %zmm26, %zmm30 + +// CHECK: vpunpckhwd -8192(%rdx), %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0x72,0x80] + vpunpckhwd -8192(%rdx), %zmm26, %zmm30 + +// CHECK: vpunpckhwd -8256(%rdx), %zmm26, %zmm30 +// CHECK: encoding: [0x62,0x61,0x2d,0x40,0x69,0xb2,0xc0,0xdf,0xff,0xff] + vpunpckhwd -8256(%rdx), %zmm26, %zmm30 + Index: test/MC/X86/x86-64-avx512bw_vl.s =================================================================== --- test/MC/X86/x86-64-avx512bw_vl.s +++ test/MC/X86/x86-64-avx512bw_vl.s @@ -7567,3 +7567,579 @@ // CHECK: encoding: [0x62,0xe1,0x5d,0x20,0xf5,0x9a,0xe0,0xef,0xff,0xff] vpmaddwd -4128(%rdx), %ymm20, %ymm19 +// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x21,0x65,0x00,0x60,0xf4] + vpunpcklbw %xmm20, %xmm19, %xmm30 + +// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} +// CHECK: encoding: [0x62,0x21,0x65,0x04,0x60,0xf4] + vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} + +// CHECK: vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x65,0x84,0x60,0xf4] + vpunpcklbw %xmm20, %xmm19, %xmm30 {%k4} {z} + +// CHECK: vpunpcklbw (%rcx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0x31] + vpunpcklbw (%rcx), %xmm19, %xmm30 + +// CHECK: vpunpcklbw 4660(%rax,%r14,8), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x21,0x65,0x00,0x60,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpunpcklbw 4660(%rax,%r14,8), %xmm19, %xmm30 + +// CHECK: vpunpcklbw 2032(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0x72,0x7f] + vpunpcklbw 2032(%rdx), %xmm19, %xmm30 + +// CHECK: vpunpcklbw 2048(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0xb2,0x00,0x08,0x00,0x00] + vpunpcklbw 2048(%rdx), %xmm19, %xmm30 + +// CHECK: vpunpcklbw -2048(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0x72,0x80] + vpunpcklbw -2048(%rdx), %xmm19, %xmm30 + +// CHECK: vpunpcklbw -2064(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x65,0x00,0x60,0xb2,0xf0,0xf7,0xff,0xff] + vpunpcklbw -2064(%rdx), %xmm19, %xmm30 + +// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x1d,0x20,0x60,0xe6] + vpunpcklbw %ymm22, %ymm28, %ymm20 + +// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0x1d,0x21,0x60,0xe6] + vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} + +// CHECK: vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x1d,0xa1,0x60,0xe6] + vpunpcklbw %ymm22, %ymm28, %ymm20 {%k1} {z} + +// CHECK: vpunpcklbw (%rcx), %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0x21] + vpunpcklbw (%rcx), %ymm28, %ymm20 + +// CHECK: vpunpcklbw 4660(%rax,%r14,8), %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x1d,0x20,0x60,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpunpcklbw 4660(%rax,%r14,8), %ymm28, %ymm20 + +// CHECK: vpunpcklbw 4064(%rdx), %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0x62,0x7f] + vpunpcklbw 4064(%rdx), %ymm28, %ymm20 + +// CHECK: vpunpcklbw 4096(%rdx), %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0xa2,0x00,0x10,0x00,0x00] + vpunpcklbw 4096(%rdx), %ymm28, %ymm20 + +// CHECK: vpunpcklbw -4096(%rdx), %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0x62,0x80] + vpunpcklbw -4096(%rdx), %ymm28, %ymm20 + +// CHECK: vpunpcklbw -4128(%rdx), %ymm28, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x1d,0x20,0x60,0xa2,0xe0,0xef,0xff,0xff] + vpunpcklbw -4128(%rdx), %ymm28, %ymm20 + +// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 +// CHECK: encoding: [0x62,0x81,0x4d,0x00,0x68,0xd3] + vpunpckhbw %xmm27, %xmm22, %xmm18 + +// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} +// CHECK: encoding: [0x62,0x81,0x4d,0x01,0x68,0xd3] + vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} + +// CHECK: vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} {z} +// CHECK: encoding: [0x62,0x81,0x4d,0x81,0x68,0xd3] + vpunpckhbw %xmm27, %xmm22, %xmm18 {%k1} {z} + +// CHECK: vpunpckhbw (%rcx), %xmm22, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x11] + vpunpckhbw (%rcx), %xmm22, %xmm18 + +// CHECK: vpunpckhbw 4660(%rax,%r14,8), %xmm22, %xmm18 +// CHECK: encoding: [0x62,0xa1,0x4d,0x00,0x68,0x94,0xf0,0x34,0x12,0x00,0x00] + vpunpckhbw 4660(%rax,%r14,8), %xmm22, %xmm18 + +// CHECK: vpunpckhbw 2032(%rdx), %xmm22, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x52,0x7f] + vpunpckhbw 2032(%rdx), %xmm22, %xmm18 + +// CHECK: vpunpckhbw 2048(%rdx), %xmm22, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x92,0x00,0x08,0x00,0x00] + vpunpckhbw 2048(%rdx), %xmm22, %xmm18 + +// CHECK: vpunpckhbw -2048(%rdx), %xmm22, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x52,0x80] + vpunpckhbw -2048(%rdx), %xmm22, %xmm18 + +// CHECK: vpunpckhbw -2064(%rdx), %xmm22, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x4d,0x00,0x68,0x92,0xf0,0xf7,0xff,0xff] + vpunpckhbw -2064(%rdx), %xmm22, %xmm18 + +// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 +// CHECK: encoding: [0x62,0x81,0x5d,0x20,0x68,0xc8] + vpunpckhbw %ymm24, %ymm20, %ymm17 + +// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} +// CHECK: encoding: [0x62,0x81,0x5d,0x25,0x68,0xc8] + vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} + +// CHECK: vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x5d,0xa5,0x68,0xc8] + vpunpckhbw %ymm24, %ymm20, %ymm17 {%k5} {z} + +// CHECK: vpunpckhbw (%rcx), %ymm20, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x09] + vpunpckhbw (%rcx), %ymm20, %ymm17 + +// CHECK: vpunpckhbw 4660(%rax,%r14,8), %ymm20, %ymm17 +// CHECK: encoding: [0x62,0xa1,0x5d,0x20,0x68,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpunpckhbw 4660(%rax,%r14,8), %ymm20, %ymm17 + +// CHECK: vpunpckhbw 4064(%rdx), %ymm20, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x4a,0x7f] + vpunpckhbw 4064(%rdx), %ymm20, %ymm17 + +// CHECK: vpunpckhbw 4096(%rdx), %ymm20, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x8a,0x00,0x10,0x00,0x00] + vpunpckhbw 4096(%rdx), %ymm20, %ymm17 + +// CHECK: vpunpckhbw -4096(%rdx), %ymm20, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x4a,0x80] + vpunpckhbw -4096(%rdx), %ymm20, %ymm17 + +// CHECK: vpunpckhbw -4128(%rdx), %ymm20, %ymm17 +// CHECK: encoding: [0x62,0xe1,0x5d,0x20,0x68,0x8a,0xe0,0xef,0xff,0xff] + vpunpckhbw -4128(%rdx), %ymm20, %ymm17 + +// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x21,0x25,0x00,0x61,0xd9] + vpunpcklwd %xmm17, %xmm27, %xmm27 + +// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} +// CHECK: encoding: [0x62,0x21,0x25,0x05,0x61,0xd9] + vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} + +// CHECK: vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0x25,0x85,0x61,0xd9] + vpunpcklwd %xmm17, %xmm27, %xmm27 {%k5} {z} + +// CHECK: vpunpcklwd (%rcx), %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x19] + vpunpcklwd (%rcx), %xmm27, %xmm27 + +// CHECK: vpunpcklwd 4660(%rax,%r14,8), %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x21,0x25,0x00,0x61,0x9c,0xf0,0x34,0x12,0x00,0x00] + vpunpcklwd 4660(%rax,%r14,8), %xmm27, %xmm27 + +// CHECK: vpunpcklwd 2032(%rdx), %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x5a,0x7f] + vpunpcklwd 2032(%rdx), %xmm27, %xmm27 + +// CHECK: vpunpcklwd 2048(%rdx), %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x9a,0x00,0x08,0x00,0x00] + vpunpcklwd 2048(%rdx), %xmm27, %xmm27 + +// CHECK: vpunpcklwd -2048(%rdx), %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x5a,0x80] + vpunpcklwd -2048(%rdx), %xmm27, %xmm27 + +// CHECK: vpunpcklwd -2064(%rdx), %xmm27, %xmm27 +// CHECK: encoding: [0x62,0x61,0x25,0x00,0x61,0x9a,0xf0,0xf7,0xff,0xff] + vpunpcklwd -2064(%rdx), %xmm27, %xmm27 + +// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x35,0x20,0x61,0xd7] + vpunpcklwd %ymm23, %ymm25, %ymm18 + +// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} +// CHECK: encoding: [0x62,0xa1,0x35,0x25,0x61,0xd7] + vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} + +// CHECK: vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} {z} +// CHECK: encoding: [0x62,0xa1,0x35,0xa5,0x61,0xd7] + vpunpcklwd %ymm23, %ymm25, %ymm18 {%k5} {z} + +// CHECK: vpunpcklwd (%rcx), %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x11] + vpunpcklwd (%rcx), %ymm25, %ymm18 + +// CHECK: vpunpcklwd 4660(%rax,%r14,8), %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xa1,0x35,0x20,0x61,0x94,0xf0,0x34,0x12,0x00,0x00] + vpunpcklwd 4660(%rax,%r14,8), %ymm25, %ymm18 + +// CHECK: vpunpcklwd 4064(%rdx), %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x52,0x7f] + vpunpcklwd 4064(%rdx), %ymm25, %ymm18 + +// CHECK: vpunpcklwd 4096(%rdx), %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x92,0x00,0x10,0x00,0x00] + vpunpcklwd 4096(%rdx), %ymm25, %ymm18 + +// CHECK: vpunpcklwd -4096(%rdx), %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x52,0x80] + vpunpcklwd -4096(%rdx), %ymm25, %ymm18 + +// CHECK: vpunpcklwd -4128(%rdx), %ymm25, %ymm18 +// CHECK: encoding: [0x62,0xe1,0x35,0x20,0x61,0x92,0xe0,0xef,0xff,0xff] + vpunpcklwd -4128(%rdx), %ymm25, %ymm18 + +// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x1d,0x00,0x69,0xc9] + vpunpckhwd %xmm17, %xmm28, %xmm17 + +// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} +// CHECK: encoding: [0x62,0xa1,0x1d,0x07,0x69,0xc9] + vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} + +// CHECK: vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} {z} +// CHECK: encoding: [0x62,0xa1,0x1d,0x87,0x69,0xc9] + vpunpckhwd %xmm17, %xmm28, %xmm17 {%k7} {z} + +// CHECK: vpunpckhwd (%rcx), %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x09] + vpunpckhwd (%rcx), %xmm28, %xmm17 + +// CHECK: vpunpckhwd 4660(%rax,%r14,8), %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x1d,0x00,0x69,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpunpckhwd 4660(%rax,%r14,8), %xmm28, %xmm17 + +// CHECK: vpunpckhwd 2032(%rdx), %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x4a,0x7f] + vpunpckhwd 2032(%rdx), %xmm28, %xmm17 + +// CHECK: vpunpckhwd 2048(%rdx), %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x8a,0x00,0x08,0x00,0x00] + vpunpckhwd 2048(%rdx), %xmm28, %xmm17 + +// CHECK: vpunpckhwd -2048(%rdx), %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x4a,0x80] + vpunpckhwd -2048(%rdx), %xmm28, %xmm17 + +// CHECK: vpunpckhwd -2064(%rdx), %xmm28, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x1d,0x00,0x69,0x8a,0xf0,0xf7,0xff,0xff] + vpunpckhwd -2064(%rdx), %xmm28, %xmm17 + +// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x21,0x35,0x20,0x69,0xc4] + vpunpckhwd %ymm20, %ymm25, %ymm24 + +// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} +// CHECK: encoding: [0x62,0x21,0x35,0x21,0x69,0xc4] + vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} + +// CHECK: vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} {z} +// CHECK: encoding: [0x62,0x21,0x35,0xa1,0x69,0xc4] + vpunpckhwd %ymm20, %ymm25, %ymm24 {%k1} {z} + +// CHECK: vpunpckhwd (%rcx), %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x01] + vpunpckhwd (%rcx), %ymm25, %ymm24 + +// CHECK: vpunpckhwd 4660(%rax,%r14,8), %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x21,0x35,0x20,0x69,0x84,0xf0,0x34,0x12,0x00,0x00] + vpunpckhwd 4660(%rax,%r14,8), %ymm25, %ymm24 + +// CHECK: vpunpckhwd 4064(%rdx), %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x42,0x7f] + vpunpckhwd 4064(%rdx), %ymm25, %ymm24 + +// CHECK: vpunpckhwd 4096(%rdx), %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x82,0x00,0x10,0x00,0x00] + vpunpckhwd 4096(%rdx), %ymm25, %ymm24 + +// CHECK: vpunpckhwd -4096(%rdx), %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x42,0x80] + vpunpckhwd -4096(%rdx), %ymm25, %ymm24 + +// CHECK: vpunpckhwd -4128(%rdx), %ymm25, %ymm24 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x82,0xe0,0xef,0xff,0xff] + vpunpckhwd -4128(%rdx), %ymm25, %ymm24 + +// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xa1,0x15,0x00,0x60,0xd7] + vpunpcklbw %xmm23, %xmm29, %xmm18 + +// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} +// CHECK: encoding: [0x62,0xa1,0x15,0x04,0x60,0xd7] + vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} + +// CHECK: vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} {z} +// CHECK: encoding: [0x62,0xa1,0x15,0x84,0x60,0xd7] + vpunpcklbw %xmm23, %xmm29, %xmm18 {%k4} {z} + +// CHECK: vpunpcklbw (%rcx), %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x11] + vpunpcklbw (%rcx), %xmm29, %xmm18 + +// CHECK: vpunpcklbw 291(%rax,%r14,8), %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xa1,0x15,0x00,0x60,0x94,0xf0,0x23,0x01,0x00,0x00] + vpunpcklbw 291(%rax,%r14,8), %xmm29, %xmm18 + +// CHECK: vpunpcklbw 2032(%rdx), %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x52,0x7f] + vpunpcklbw 2032(%rdx), %xmm29, %xmm18 + +// CHECK: vpunpcklbw 2048(%rdx), %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x92,0x00,0x08,0x00,0x00] + vpunpcklbw 2048(%rdx), %xmm29, %xmm18 + +// CHECK: vpunpcklbw -2048(%rdx), %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x52,0x80] + vpunpcklbw -2048(%rdx), %xmm29, %xmm18 + +// CHECK: vpunpcklbw -2064(%rdx), %xmm29, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x15,0x00,0x60,0x92,0xf0,0xf7,0xff,0xff] + vpunpcklbw -2064(%rdx), %xmm29, %xmm18 + +// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x21,0x1d,0x20,0x60,0xdd] + vpunpcklbw %ymm21, %ymm28, %ymm27 + +// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} +// CHECK: encoding: [0x62,0x21,0x1d,0x24,0x60,0xdd] + vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} + +// CHECK: vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} {z} +// CHECK: encoding: [0x62,0x21,0x1d,0xa4,0x60,0xdd] + vpunpcklbw %ymm21, %ymm28, %ymm27 {%k4} {z} + +// CHECK: vpunpcklbw (%rcx), %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x19] + vpunpcklbw (%rcx), %ymm28, %ymm27 + +// CHECK: vpunpcklbw 291(%rax,%r14,8), %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x21,0x1d,0x20,0x60,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpunpcklbw 291(%rax,%r14,8), %ymm28, %ymm27 + +// CHECK: vpunpcklbw 4064(%rdx), %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x5a,0x7f] + vpunpcklbw 4064(%rdx), %ymm28, %ymm27 + +// CHECK: vpunpcklbw 4096(%rdx), %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x9a,0x00,0x10,0x00,0x00] + vpunpcklbw 4096(%rdx), %ymm28, %ymm27 + +// CHECK: vpunpcklbw -4096(%rdx), %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x5a,0x80] + vpunpcklbw -4096(%rdx), %ymm28, %ymm27 + +// CHECK: vpunpcklbw -4128(%rdx), %ymm28, %ymm27 +// CHECK: encoding: [0x62,0x61,0x1d,0x20,0x60,0x9a,0xe0,0xef,0xff,0xff] + vpunpcklbw -4128(%rdx), %ymm28, %ymm27 + +// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 +// CHECK: encoding: [0x62,0x81,0x55,0x00,0x68,0xd0] + vpunpckhbw %xmm24, %xmm21, %xmm18 + +// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} +// CHECK: encoding: [0x62,0x81,0x55,0x06,0x68,0xd0] + vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} + +// CHECK: vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0x55,0x86,0x68,0xd0] + vpunpckhbw %xmm24, %xmm21, %xmm18 {%k6} {z} + +// CHECK: vpunpckhbw (%rcx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x11] + vpunpckhbw (%rcx), %xmm21, %xmm18 + +// CHECK: vpunpckhbw 291(%rax,%r14,8), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xa1,0x55,0x00,0x68,0x94,0xf0,0x23,0x01,0x00,0x00] + vpunpckhbw 291(%rax,%r14,8), %xmm21, %xmm18 + +// CHECK: vpunpckhbw 2032(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x52,0x7f] + vpunpckhbw 2032(%rdx), %xmm21, %xmm18 + +// CHECK: vpunpckhbw 2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x92,0x00,0x08,0x00,0x00] + vpunpckhbw 2048(%rdx), %xmm21, %xmm18 + +// CHECK: vpunpckhbw -2048(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x52,0x80] + vpunpckhbw -2048(%rdx), %xmm21, %xmm18 + +// CHECK: vpunpckhbw -2064(%rdx), %xmm21, %xmm18 +// CHECK: encoding: [0x62,0xe1,0x55,0x00,0x68,0x92,0xf0,0xf7,0xff,0xff] + vpunpckhbw -2064(%rdx), %xmm21, %xmm18 + +// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0x68,0xe7] + vpunpckhbw %ymm23, %ymm26, %ymm20 + +// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} +// CHECK: encoding: [0x62,0xa1,0x2d,0x21,0x68,0xe7] + vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} + +// CHECK: vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} {z} +// CHECK: encoding: [0x62,0xa1,0x2d,0xa1,0x68,0xe7] + vpunpckhbw %ymm23, %ymm26, %ymm20 {%k1} {z} + +// CHECK: vpunpckhbw (%rcx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0x21] + vpunpckhbw (%rcx), %ymm26, %ymm20 + +// CHECK: vpunpckhbw 291(%rax,%r14,8), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x2d,0x20,0x68,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhbw 291(%rax,%r14,8), %ymm26, %ymm20 + +// CHECK: vpunpckhbw 4064(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0x62,0x7f] + vpunpckhbw 4064(%rdx), %ymm26, %ymm20 + +// CHECK: vpunpckhbw 4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0xa2,0x00,0x10,0x00,0x00] + vpunpckhbw 4096(%rdx), %ymm26, %ymm20 + +// CHECK: vpunpckhbw -4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0x62,0x80] + vpunpckhbw -4096(%rdx), %ymm26, %ymm20 + +// CHECK: vpunpckhbw -4128(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x2d,0x20,0x68,0xa2,0xe0,0xef,0xff,0xff] + vpunpckhbw -4128(%rdx), %ymm26, %ymm20 + +// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x35,0x00,0x61,0xed] + vpunpcklwd %xmm21, %xmm25, %xmm21 + +// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} +// CHECK: encoding: [0x62,0xa1,0x35,0x06,0x61,0xed] + vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} + +// CHECK: vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0x35,0x86,0x61,0xed] + vpunpcklwd %xmm21, %xmm25, %xmm21 {%k6} {z} + +// CHECK: vpunpcklwd (%rcx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0x29] + vpunpcklwd (%rcx), %xmm25, %xmm21 + +// CHECK: vpunpcklwd 291(%rax,%r14,8), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xa1,0x35,0x00,0x61,0xac,0xf0,0x23,0x01,0x00,0x00] + vpunpcklwd 291(%rax,%r14,8), %xmm25, %xmm21 + +// CHECK: vpunpcklwd 2032(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0x6a,0x7f] + vpunpcklwd 2032(%rdx), %xmm25, %xmm21 + +// CHECK: vpunpcklwd 2048(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0xaa,0x00,0x08,0x00,0x00] + vpunpcklwd 2048(%rdx), %xmm25, %xmm21 + +// CHECK: vpunpcklwd -2048(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0x6a,0x80] + vpunpcklwd -2048(%rdx), %xmm25, %xmm21 + +// CHECK: vpunpcklwd -2064(%rdx), %xmm25, %xmm21 +// CHECK: encoding: [0x62,0xe1,0x35,0x00,0x61,0xaa,0xf0,0xf7,0xff,0xff] + vpunpcklwd -2064(%rdx), %xmm25, %xmm21 + +// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x01,0x45,0x20,0x61,0xd2] + vpunpcklwd %ymm26, %ymm23, %ymm26 + +// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} +// CHECK: encoding: [0x62,0x01,0x45,0x22,0x61,0xd2] + vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} + +// CHECK: vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} {z} +// CHECK: encoding: [0x62,0x01,0x45,0xa2,0x61,0xd2] + vpunpcklwd %ymm26, %ymm23, %ymm26 {%k2} {z} + +// CHECK: vpunpcklwd (%rcx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x11] + vpunpcklwd (%rcx), %ymm23, %ymm26 + +// CHECK: vpunpcklwd 291(%rax,%r14,8), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x21,0x45,0x20,0x61,0x94,0xf0,0x23,0x01,0x00,0x00] + vpunpcklwd 291(%rax,%r14,8), %ymm23, %ymm26 + +// CHECK: vpunpcklwd 4064(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x52,0x7f] + vpunpcklwd 4064(%rdx), %ymm23, %ymm26 + +// CHECK: vpunpcklwd 4096(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x92,0x00,0x10,0x00,0x00] + vpunpcklwd 4096(%rdx), %ymm23, %ymm26 + +// CHECK: vpunpcklwd -4096(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x52,0x80] + vpunpcklwd -4096(%rdx), %ymm23, %ymm26 + +// CHECK: vpunpcklwd -4128(%rdx), %ymm23, %ymm26 +// CHECK: encoding: [0x62,0x61,0x45,0x20,0x61,0x92,0xe0,0xef,0xff,0xff] + vpunpcklwd -4128(%rdx), %ymm23, %ymm26 + +// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x6d,0x00,0x69,0xcf] + vpunpckhwd %xmm23, %xmm18, %xmm17 + +// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} +// CHECK: encoding: [0x62,0xa1,0x6d,0x03,0x69,0xcf] + vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} + +// CHECK: vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} {z} +// CHECK: encoding: [0x62,0xa1,0x6d,0x83,0x69,0xcf] + vpunpckhwd %xmm23, %xmm18, %xmm17 {%k3} {z} + +// CHECK: vpunpckhwd (%rcx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x09] + vpunpckhwd (%rcx), %xmm18, %xmm17 + +// CHECK: vpunpckhwd 291(%rax,%r14,8), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xa1,0x6d,0x00,0x69,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpunpckhwd 291(%rax,%r14,8), %xmm18, %xmm17 + +// CHECK: vpunpckhwd 2032(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x4a,0x7f] + vpunpckhwd 2032(%rdx), %xmm18, %xmm17 + +// CHECK: vpunpckhwd 2048(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x8a,0x00,0x08,0x00,0x00] + vpunpckhwd 2048(%rdx), %xmm18, %xmm17 + +// CHECK: vpunpckhwd -2048(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x4a,0x80] + vpunpckhwd -2048(%rdx), %xmm18, %xmm17 + +// CHECK: vpunpckhwd -2064(%rdx), %xmm18, %xmm17 +// CHECK: encoding: [0x62,0xe1,0x6d,0x00,0x69,0x8a,0xf0,0xf7,0xff,0xff] + vpunpckhwd -2064(%rdx), %xmm18, %xmm17 + +// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x01,0x35,0x20,0x69,0xe2] + vpunpckhwd %ymm26, %ymm25, %ymm28 + +// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} +// CHECK: encoding: [0x62,0x01,0x35,0x24,0x69,0xe2] + vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} + +// CHECK: vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} {z} +// CHECK: encoding: [0x62,0x01,0x35,0xa4,0x69,0xe2] + vpunpckhwd %ymm26, %ymm25, %ymm28 {%k4} {z} + +// CHECK: vpunpckhwd (%rcx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x21] + vpunpckhwd (%rcx), %ymm25, %ymm28 + +// CHECK: vpunpckhwd 291(%rax,%r14,8), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x21,0x35,0x20,0x69,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhwd 291(%rax,%r14,8), %ymm25, %ymm28 + +// CHECK: vpunpckhwd 4064(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x62,0x7f] + vpunpckhwd 4064(%rdx), %ymm25, %ymm28 + +// CHECK: vpunpckhwd 4096(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0x00,0x10,0x00,0x00] + vpunpckhwd 4096(%rdx), %ymm25, %ymm28 + +// CHECK: vpunpckhwd -4096(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0x62,0x80] + vpunpckhwd -4096(%rdx), %ymm25, %ymm28 + +// CHECK: vpunpckhwd -4128(%rdx), %ymm25, %ymm28 +// CHECK: encoding: [0x62,0x61,0x35,0x20,0x69,0xa2,0xe0,0xef,0xff,0xff] + vpunpckhwd -4128(%rdx), %ymm25, %ymm28 + Index: test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- test/MC/X86/x86-64-avx512f_vl.s +++ test/MC/X86/x86-64-avx512f_vl.s @@ -18843,3 +18843,899 @@ // CHECK: vcvtdq2ps -516(%rdx){1to8}, %ymm24 // CHECK: encoding: [0x62,0x61,0x7c,0x38,0x5b,0x82,0xfc,0xfd,0xff,0xff] vcvtdq2ps -516(%rdx){1to8}, %ymm24 +// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x01,0x64,0x00,0x15,0xf3] + vunpckhps %xmm27, %xmm19, %xmm30 + +// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 {%k2} +// CHECK: encoding: [0x62,0x01,0x64,0x02,0x15,0xf3] + vunpckhps %xmm27, %xmm19, %xmm30 {%k2} + +// CHECK: vunpckhps %xmm27, %xmm19, %xmm30 {%k2} {z} +// CHECK: encoding: [0x62,0x01,0x64,0x82,0x15,0xf3] + vunpckhps %xmm27, %xmm19, %xmm30 {%k2} {z} + +// CHECK: vunpckhps (%rcx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0x31] + vunpckhps (%rcx), %xmm19, %xmm30 + +// CHECK: vunpckhps 291(%rax,%r14,8), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x21,0x64,0x00,0x15,0xb4,0xf0,0x23,0x01,0x00,0x00] + vunpckhps 291(%rax,%r14,8), %xmm19, %xmm30 + +// CHECK: vunpckhps (%rcx){1to4}, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0x31] + vunpckhps (%rcx){1to4}, %xmm19, %xmm30 + +// CHECK: vunpckhps 2032(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0x72,0x7f] + vunpckhps 2032(%rdx), %xmm19, %xmm30 + +// CHECK: vunpckhps 2048(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0xb2,0x00,0x08,0x00,0x00] + vunpckhps 2048(%rdx), %xmm19, %xmm30 + +// CHECK: vunpckhps -2048(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0x72,0x80] + vunpckhps -2048(%rdx), %xmm19, %xmm30 + +// CHECK: vunpckhps -2064(%rdx), %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x00,0x15,0xb2,0xf0,0xf7,0xff,0xff] + vunpckhps -2064(%rdx), %xmm19, %xmm30 + +// CHECK: vunpckhps 508(%rdx){1to4}, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0x72,0x7f] + vunpckhps 508(%rdx){1to4}, %xmm19, %xmm30 + +// CHECK: vunpckhps 512(%rdx){1to4}, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0xb2,0x00,0x02,0x00,0x00] + vunpckhps 512(%rdx){1to4}, %xmm19, %xmm30 + +// CHECK: vunpckhps -512(%rdx){1to4}, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0x72,0x80] + vunpckhps -512(%rdx){1to4}, %xmm19, %xmm30 + +// CHECK: vunpckhps -516(%rdx){1to4}, %xmm19, %xmm30 +// CHECK: encoding: [0x62,0x61,0x64,0x10,0x15,0xb2,0xfc,0xfd,0xff,0xff] + vunpckhps -516(%rdx){1to4}, %xmm19, %xmm30 + +// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 +// CHECK: encoding: [0x62,0x81,0x34,0x20,0x15,0xe9] + vunpckhps %ymm25, %ymm25, %ymm21 + +// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 {%k5} +// CHECK: encoding: [0x62,0x81,0x34,0x25,0x15,0xe9] + vunpckhps %ymm25, %ymm25, %ymm21 {%k5} + +// CHECK: vunpckhps %ymm25, %ymm25, %ymm21 {%k5} {z} +// CHECK: encoding: [0x62,0x81,0x34,0xa5,0x15,0xe9] + vunpckhps %ymm25, %ymm25, %ymm21 {%k5} {z} + +// CHECK: vunpckhps (%rcx), %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0x29] + vunpckhps (%rcx), %ymm25, %ymm21 + +// CHECK: vunpckhps 291(%rax,%r14,8), %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xa1,0x34,0x20,0x15,0xac,0xf0,0x23,0x01,0x00,0x00] + vunpckhps 291(%rax,%r14,8), %ymm25, %ymm21 + +// CHECK: vunpckhps (%rcx){1to8}, %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0x29] + vunpckhps (%rcx){1to8}, %ymm25, %ymm21 + +// CHECK: vunpckhps 4064(%rdx), %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0x6a,0x7f] + vunpckhps 4064(%rdx), %ymm25, %ymm21 + +// CHECK: vunpckhps 4096(%rdx), %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0xaa,0x00,0x10,0x00,0x00] + vunpckhps 4096(%rdx), %ymm25, %ymm21 + +// CHECK: vunpckhps -4096(%rdx), %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0x6a,0x80] + vunpckhps -4096(%rdx), %ymm25, %ymm21 + +// CHECK: vunpckhps -4128(%rdx), %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x20,0x15,0xaa,0xe0,0xef,0xff,0xff] + vunpckhps -4128(%rdx), %ymm25, %ymm21 + +// CHECK: vunpckhps 508(%rdx){1to8}, %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0x6a,0x7f] + vunpckhps 508(%rdx){1to8}, %ymm25, %ymm21 + +// CHECK: vunpckhps 512(%rdx){1to8}, %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0xaa,0x00,0x02,0x00,0x00] + vunpckhps 512(%rdx){1to8}, %ymm25, %ymm21 + +// CHECK: vunpckhps -512(%rdx){1to8}, %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0x6a,0x80] + vunpckhps -512(%rdx){1to8}, %ymm25, %ymm21 + +// CHECK: vunpckhps -516(%rdx){1to8}, %ymm25, %ymm21 +// CHECK: encoding: [0x62,0xe1,0x34,0x30,0x15,0xaa,0xfc,0xfd,0xff,0xff] + vunpckhps -516(%rdx){1to8}, %ymm25, %ymm21 + +// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x01,0x3c,0x00,0x14,0xea] + vunpcklps %xmm26, %xmm24, %xmm29 + +// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 {%k6} +// CHECK: encoding: [0x62,0x01,0x3c,0x06,0x14,0xea] + vunpcklps %xmm26, %xmm24, %xmm29 {%k6} + +// CHECK: vunpcklps %xmm26, %xmm24, %xmm29 {%k6} {z} +// CHECK: encoding: [0x62,0x01,0x3c,0x86,0x14,0xea] + vunpcklps %xmm26, %xmm24, %xmm29 {%k6} {z} + +// CHECK: vunpcklps (%rcx), %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0x29] + vunpcklps (%rcx), %xmm24, %xmm29 + +// CHECK: vunpcklps 291(%rax,%r14,8), %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x21,0x3c,0x00,0x14,0xac,0xf0,0x23,0x01,0x00,0x00] + vunpcklps 291(%rax,%r14,8), %xmm24, %xmm29 + +// CHECK: vunpcklps (%rcx){1to4}, %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0x29] + vunpcklps (%rcx){1to4}, %xmm24, %xmm29 + +// CHECK: vunpcklps 2032(%rdx), %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0x6a,0x7f] + vunpcklps 2032(%rdx), %xmm24, %xmm29 + +// CHECK: vunpcklps 2048(%rdx), %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0xaa,0x00,0x08,0x00,0x00] + vunpcklps 2048(%rdx), %xmm24, %xmm29 + +// CHECK: vunpcklps -2048(%rdx), %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0x6a,0x80] + vunpcklps -2048(%rdx), %xmm24, %xmm29 + +// CHECK: vunpcklps -2064(%rdx), %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x00,0x14,0xaa,0xf0,0xf7,0xff,0xff] + vunpcklps -2064(%rdx), %xmm24, %xmm29 + +// CHECK: vunpcklps 508(%rdx){1to4}, %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0x6a,0x7f] + vunpcklps 508(%rdx){1to4}, %xmm24, %xmm29 + +// CHECK: vunpcklps 512(%rdx){1to4}, %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0xaa,0x00,0x02,0x00,0x00] + vunpcklps 512(%rdx){1to4}, %xmm24, %xmm29 + +// CHECK: vunpcklps -512(%rdx){1to4}, %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0x6a,0x80] + vunpcklps -512(%rdx){1to4}, %xmm24, %xmm29 + +// CHECK: vunpcklps -516(%rdx){1to4}, %xmm24, %xmm29 +// CHECK: encoding: [0x62,0x61,0x3c,0x10,0x14,0xaa,0xfc,0xfd,0xff,0xff] + vunpcklps -516(%rdx){1to4}, %xmm24, %xmm29 + +// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x14,0xd2] + vunpcklps %ymm18, %ymm18, %ymm26 + +// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 {%k1} +// CHECK: encoding: [0x62,0x21,0x6c,0x21,0x14,0xd2] + vunpcklps %ymm18, %ymm18, %ymm26 {%k1} + +// CHECK: vunpcklps %ymm18, %ymm18, %ymm26 {%k1} {z} +// CHECK: encoding: [0x62,0x21,0x6c,0xa1,0x14,0xd2] + vunpcklps %ymm18, %ymm18, %ymm26 {%k1} {z} + +// CHECK: vunpcklps (%rcx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x11] + vunpcklps (%rcx), %ymm18, %ymm26 + +// CHECK: vunpcklps 291(%rax,%r14,8), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x21,0x6c,0x20,0x14,0x94,0xf0,0x23,0x01,0x00,0x00] + vunpcklps 291(%rax,%r14,8), %ymm18, %ymm26 + +// CHECK: vunpcklps (%rcx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x11] + vunpcklps (%rcx){1to8}, %ymm18, %ymm26 + +// CHECK: vunpcklps 4064(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x52,0x7f] + vunpcklps 4064(%rdx), %ymm18, %ymm26 + +// CHECK: vunpcklps 4096(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x92,0x00,0x10,0x00,0x00] + vunpcklps 4096(%rdx), %ymm18, %ymm26 + +// CHECK: vunpcklps -4096(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x52,0x80] + vunpcklps -4096(%rdx), %ymm18, %ymm26 + +// CHECK: vunpcklps -4128(%rdx), %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x20,0x14,0x92,0xe0,0xef,0xff,0xff] + vunpcklps -4128(%rdx), %ymm18, %ymm26 + +// CHECK: vunpcklps 508(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x52,0x7f] + vunpcklps 508(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vunpcklps 512(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x92,0x00,0x02,0x00,0x00] + vunpcklps 512(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vunpcklps -512(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x52,0x80] + vunpcklps -512(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vunpcklps -516(%rdx){1to8}, %ymm18, %ymm26 +// CHECK: encoding: [0x62,0x61,0x6c,0x30,0x14,0x92,0xfc,0xfd,0xff,0xff] + vunpcklps -516(%rdx){1to8}, %ymm18, %ymm26 + +// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0x81,0xb5,0x00,0x15,0xda] + vunpckhpd %xmm26, %xmm25, %xmm19 + +// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} +// CHECK: encoding: [0x62,0x81,0xb5,0x03,0x15,0xda] + vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} + +// CHECK: vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} {z} +// CHECK: encoding: [0x62,0x81,0xb5,0x83,0x15,0xda] + vunpckhpd %xmm26, %xmm25, %xmm19 {%k3} {z} + +// CHECK: vunpckhpd (%rcx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x19] + vunpckhpd (%rcx), %xmm25, %xmm19 + +// CHECK: vunpckhpd 291(%rax,%r14,8), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xa1,0xb5,0x00,0x15,0x9c,0xf0,0x23,0x01,0x00,0x00] + vunpckhpd 291(%rax,%r14,8), %xmm25, %xmm19 + +// CHECK: vunpckhpd (%rcx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x19] + vunpckhpd (%rcx){1to2}, %xmm25, %xmm19 + +// CHECK: vunpckhpd 2032(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x5a,0x7f] + vunpckhpd 2032(%rdx), %xmm25, %xmm19 + +// CHECK: vunpckhpd 2048(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x9a,0x00,0x08,0x00,0x00] + vunpckhpd 2048(%rdx), %xmm25, %xmm19 + +// CHECK: vunpckhpd -2048(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x5a,0x80] + vunpckhpd -2048(%rdx), %xmm25, %xmm19 + +// CHECK: vunpckhpd -2064(%rdx), %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x00,0x15,0x9a,0xf0,0xf7,0xff,0xff] + vunpckhpd -2064(%rdx), %xmm25, %xmm19 + +// CHECK: vunpckhpd 1016(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x5a,0x7f] + vunpckhpd 1016(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vunpckhpd 1024(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x9a,0x00,0x04,0x00,0x00] + vunpckhpd 1024(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vunpckhpd -1024(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x5a,0x80] + vunpckhpd -1024(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vunpckhpd -1032(%rdx){1to2}, %xmm25, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xb5,0x10,0x15,0x9a,0xf8,0xfb,0xff,0xff] + vunpckhpd -1032(%rdx){1to2}, %xmm25, %xmm19 + +// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x01,0xcd,0x20,0x15,0xc8] + vunpckhpd %ymm24, %ymm22, %ymm25 + +// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} +// CHECK: encoding: [0x62,0x01,0xcd,0x27,0x15,0xc8] + vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} + +// CHECK: vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} {z} +// CHECK: encoding: [0x62,0x01,0xcd,0xa7,0x15,0xc8] + vunpckhpd %ymm24, %ymm22, %ymm25 {%k7} {z} + +// CHECK: vunpckhpd (%rcx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x09] + vunpckhpd (%rcx), %ymm22, %ymm25 + +// CHECK: vunpckhpd 291(%rax,%r14,8), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x21,0xcd,0x20,0x15,0x8c,0xf0,0x23,0x01,0x00,0x00] + vunpckhpd 291(%rax,%r14,8), %ymm22, %ymm25 + +// CHECK: vunpckhpd (%rcx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x09] + vunpckhpd (%rcx){1to4}, %ymm22, %ymm25 + +// CHECK: vunpckhpd 4064(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x4a,0x7f] + vunpckhpd 4064(%rdx), %ymm22, %ymm25 + +// CHECK: vunpckhpd 4096(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x8a,0x00,0x10,0x00,0x00] + vunpckhpd 4096(%rdx), %ymm22, %ymm25 + +// CHECK: vunpckhpd -4096(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x4a,0x80] + vunpckhpd -4096(%rdx), %ymm22, %ymm25 + +// CHECK: vunpckhpd -4128(%rdx), %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x20,0x15,0x8a,0xe0,0xef,0xff,0xff] + vunpckhpd -4128(%rdx), %ymm22, %ymm25 + +// CHECK: vunpckhpd 1016(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x4a,0x7f] + vunpckhpd 1016(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vunpckhpd 1024(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x8a,0x00,0x04,0x00,0x00] + vunpckhpd 1024(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vunpckhpd -1024(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x4a,0x80] + vunpckhpd -1024(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vunpckhpd -1032(%rdx){1to4}, %ymm22, %ymm25 +// CHECK: encoding: [0x62,0x61,0xcd,0x30,0x15,0x8a,0xf8,0xfb,0xff,0xff] + vunpckhpd -1032(%rdx){1to4}, %ymm22, %ymm25 + +// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x21,0xad,0x00,0x14,0xf2] + vunpcklpd %xmm18, %xmm26, %xmm30 + +// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} +// CHECK: encoding: [0x62,0x21,0xad,0x05,0x14,0xf2] + vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} + +// CHECK: vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} {z} +// CHECK: encoding: [0x62,0x21,0xad,0x85,0x14,0xf2] + vunpcklpd %xmm18, %xmm26, %xmm30 {%k5} {z} + +// CHECK: vunpcklpd (%rcx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0x31] + vunpcklpd (%rcx), %xmm26, %xmm30 + +// CHECK: vunpcklpd 291(%rax,%r14,8), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x21,0xad,0x00,0x14,0xb4,0xf0,0x23,0x01,0x00,0x00] + vunpcklpd 291(%rax,%r14,8), %xmm26, %xmm30 + +// CHECK: vunpcklpd (%rcx){1to2}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0x31] + vunpcklpd (%rcx){1to2}, %xmm26, %xmm30 + +// CHECK: vunpcklpd 2032(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0x72,0x7f] + vunpcklpd 2032(%rdx), %xmm26, %xmm30 + +// CHECK: vunpcklpd 2048(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0xb2,0x00,0x08,0x00,0x00] + vunpcklpd 2048(%rdx), %xmm26, %xmm30 + +// CHECK: vunpcklpd -2048(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0x72,0x80] + vunpcklpd -2048(%rdx), %xmm26, %xmm30 + +// CHECK: vunpcklpd -2064(%rdx), %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x00,0x14,0xb2,0xf0,0xf7,0xff,0xff] + vunpcklpd -2064(%rdx), %xmm26, %xmm30 + +// CHECK: vunpcklpd 1016(%rdx){1to2}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0x72,0x7f] + vunpcklpd 1016(%rdx){1to2}, %xmm26, %xmm30 + +// CHECK: vunpcklpd 1024(%rdx){1to2}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0xb2,0x00,0x04,0x00,0x00] + vunpcklpd 1024(%rdx){1to2}, %xmm26, %xmm30 + +// CHECK: vunpcklpd -1024(%rdx){1to2}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0x72,0x80] + vunpcklpd -1024(%rdx){1to2}, %xmm26, %xmm30 + +// CHECK: vunpcklpd -1032(%rdx){1to2}, %xmm26, %xmm30 +// CHECK: encoding: [0x62,0x61,0xad,0x10,0x14,0xb2,0xf8,0xfb,0xff,0xff] + vunpcklpd -1032(%rdx){1to2}, %xmm26, %xmm30 + +// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0xad,0x20,0x14,0xe1] + vunpcklpd %ymm17, %ymm26, %ymm20 + +// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} +// CHECK: encoding: [0x62,0xa1,0xad,0x22,0x14,0xe1] + vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} + +// CHECK: vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} {z} +// CHECK: encoding: [0x62,0xa1,0xad,0xa2,0x14,0xe1] + vunpcklpd %ymm17, %ymm26, %ymm20 {%k2} {z} + +// CHECK: vunpcklpd (%rcx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0x21] + vunpcklpd (%rcx), %ymm26, %ymm20 + +// CHECK: vunpcklpd 291(%rax,%r14,8), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xa1,0xad,0x20,0x14,0xa4,0xf0,0x23,0x01,0x00,0x00] + vunpcklpd 291(%rax,%r14,8), %ymm26, %ymm20 + +// CHECK: vunpcklpd (%rcx){1to4}, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0x21] + vunpcklpd (%rcx){1to4}, %ymm26, %ymm20 + +// CHECK: vunpcklpd 4064(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0x62,0x7f] + vunpcklpd 4064(%rdx), %ymm26, %ymm20 + +// CHECK: vunpcklpd 4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0xa2,0x00,0x10,0x00,0x00] + vunpcklpd 4096(%rdx), %ymm26, %ymm20 + +// CHECK: vunpcklpd -4096(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0x62,0x80] + vunpcklpd -4096(%rdx), %ymm26, %ymm20 + +// CHECK: vunpcklpd -4128(%rdx), %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x20,0x14,0xa2,0xe0,0xef,0xff,0xff] + vunpcklpd -4128(%rdx), %ymm26, %ymm20 + +// CHECK: vunpcklpd 1016(%rdx){1to4}, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0x62,0x7f] + vunpcklpd 1016(%rdx){1to4}, %ymm26, %ymm20 + +// CHECK: vunpcklpd 1024(%rdx){1to4}, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0xa2,0x00,0x04,0x00,0x00] + vunpcklpd 1024(%rdx){1to4}, %ymm26, %ymm20 + +// CHECK: vunpcklpd -1024(%rdx){1to4}, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0x62,0x80] + vunpcklpd -1024(%rdx){1to4}, %ymm26, %ymm20 + +// CHECK: vunpcklpd -1032(%rdx){1to4}, %ymm26, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xad,0x30,0x14,0xa2,0xf8,0xfb,0xff,0xff] + vunpcklpd -1032(%rdx){1to4}, %ymm26, %ymm20 + +// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x21,0x55,0x00,0x62,0xc9] + vpunpckldq %xmm17, %xmm21, %xmm25 + +// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} +// CHECK: encoding: [0x62,0x21,0x55,0x07,0x62,0xc9] + vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} + +// CHECK: vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} {z} +// CHECK: encoding: [0x62,0x21,0x55,0x87,0x62,0xc9] + vpunpckldq %xmm17, %xmm21, %xmm25 {%k7} {z} + +// CHECK: vpunpckldq (%rcx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x09] + vpunpckldq (%rcx), %xmm21, %xmm25 + +// CHECK: vpunpckldq 291(%rax,%r14,8), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x21,0x55,0x00,0x62,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpunpckldq 291(%rax,%r14,8), %xmm21, %xmm25 + +// CHECK: vpunpckldq (%rcx){1to4}, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x09] + vpunpckldq (%rcx){1to4}, %xmm21, %xmm25 + +// CHECK: vpunpckldq 2032(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x4a,0x7f] + vpunpckldq 2032(%rdx), %xmm21, %xmm25 + +// CHECK: vpunpckldq 2048(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x8a,0x00,0x08,0x00,0x00] + vpunpckldq 2048(%rdx), %xmm21, %xmm25 + +// CHECK: vpunpckldq -2048(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x4a,0x80] + vpunpckldq -2048(%rdx), %xmm21, %xmm25 + +// CHECK: vpunpckldq -2064(%rdx), %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x00,0x62,0x8a,0xf0,0xf7,0xff,0xff] + vpunpckldq -2064(%rdx), %xmm21, %xmm25 + +// CHECK: vpunpckldq 508(%rdx){1to4}, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x4a,0x7f] + vpunpckldq 508(%rdx){1to4}, %xmm21, %xmm25 + +// CHECK: vpunpckldq 512(%rdx){1to4}, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x8a,0x00,0x02,0x00,0x00] + vpunpckldq 512(%rdx){1to4}, %xmm21, %xmm25 + +// CHECK: vpunpckldq -512(%rdx){1to4}, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x4a,0x80] + vpunpckldq -512(%rdx){1to4}, %xmm21, %xmm25 + +// CHECK: vpunpckldq -516(%rdx){1to4}, %xmm21, %xmm25 +// CHECK: encoding: [0x62,0x61,0x55,0x10,0x62,0x8a,0xfc,0xfd,0xff,0xff] + vpunpckldq -516(%rdx){1to4}, %xmm21, %xmm25 + +// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0x81,0x65,0x20,0x62,0xe2] + vpunpckldq %ymm26, %ymm19, %ymm20 + +// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} +// CHECK: encoding: [0x62,0x81,0x65,0x27,0x62,0xe2] + vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} + +// CHECK: vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} {z} +// CHECK: encoding: [0x62,0x81,0x65,0xa7,0x62,0xe2] + vpunpckldq %ymm26, %ymm19, %ymm20 {%k7} {z} + +// CHECK: vpunpckldq (%rcx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0x21] + vpunpckldq (%rcx), %ymm19, %ymm20 + +// CHECK: vpunpckldq 291(%rax,%r14,8), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xa1,0x65,0x20,0x62,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpckldq 291(%rax,%r14,8), %ymm19, %ymm20 + +// CHECK: vpunpckldq (%rcx){1to8}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0x21] + vpunpckldq (%rcx){1to8}, %ymm19, %ymm20 + +// CHECK: vpunpckldq 4064(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0x62,0x7f] + vpunpckldq 4064(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckldq 4096(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0xa2,0x00,0x10,0x00,0x00] + vpunpckldq 4096(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckldq -4096(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0x62,0x80] + vpunpckldq -4096(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckldq -4128(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x20,0x62,0xa2,0xe0,0xef,0xff,0xff] + vpunpckldq -4128(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckldq 508(%rdx){1to8}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0x62,0x7f] + vpunpckldq 508(%rdx){1to8}, %ymm19, %ymm20 + +// CHECK: vpunpckldq 512(%rdx){1to8}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0xa2,0x00,0x02,0x00,0x00] + vpunpckldq 512(%rdx){1to8}, %ymm19, %ymm20 + +// CHECK: vpunpckldq -512(%rdx){1to8}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0x62,0x80] + vpunpckldq -512(%rdx){1to8}, %ymm19, %ymm20 + +// CHECK: vpunpckldq -516(%rdx){1to8}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0x65,0x30,0x62,0xa2,0xfc,0xfd,0xff,0xff] + vpunpckldq -516(%rdx){1to8}, %ymm19, %ymm20 + +// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x01,0x3d,0x00,0x6a,0xe3] + vpunpckhdq %xmm27, %xmm24, %xmm28 + +// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} +// CHECK: encoding: [0x62,0x01,0x3d,0x07,0x6a,0xe3] + vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} + +// CHECK: vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} {z} +// CHECK: encoding: [0x62,0x01,0x3d,0x87,0x6a,0xe3] + vpunpckhdq %xmm27, %xmm24, %xmm28 {%k7} {z} + +// CHECK: vpunpckhdq (%rcx), %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0x21] + vpunpckhdq (%rcx), %xmm24, %xmm28 + +// CHECK: vpunpckhdq 291(%rax,%r14,8), %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x21,0x3d,0x00,0x6a,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhdq 291(%rax,%r14,8), %xmm24, %xmm28 + +// CHECK: vpunpckhdq (%rcx){1to4}, %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0x21] + vpunpckhdq (%rcx){1to4}, %xmm24, %xmm28 + +// CHECK: vpunpckhdq 2032(%rdx), %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0x62,0x7f] + vpunpckhdq 2032(%rdx), %xmm24, %xmm28 + +// CHECK: vpunpckhdq 2048(%rdx), %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0xa2,0x00,0x08,0x00,0x00] + vpunpckhdq 2048(%rdx), %xmm24, %xmm28 + +// CHECK: vpunpckhdq -2048(%rdx), %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0x62,0x80] + vpunpckhdq -2048(%rdx), %xmm24, %xmm28 + +// CHECK: vpunpckhdq -2064(%rdx), %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x00,0x6a,0xa2,0xf0,0xf7,0xff,0xff] + vpunpckhdq -2064(%rdx), %xmm24, %xmm28 + +// CHECK: vpunpckhdq 508(%rdx){1to4}, %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0x62,0x7f] + vpunpckhdq 508(%rdx){1to4}, %xmm24, %xmm28 + +// CHECK: vpunpckhdq 512(%rdx){1to4}, %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0xa2,0x00,0x02,0x00,0x00] + vpunpckhdq 512(%rdx){1to4}, %xmm24, %xmm28 + +// CHECK: vpunpckhdq -512(%rdx){1to4}, %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0x62,0x80] + vpunpckhdq -512(%rdx){1to4}, %xmm24, %xmm28 + +// CHECK: vpunpckhdq -516(%rdx){1to4}, %xmm24, %xmm28 +// CHECK: encoding: [0x62,0x61,0x3d,0x10,0x6a,0xa2,0xfc,0xfd,0xff,0xff] + vpunpckhdq -516(%rdx){1to4}, %xmm24, %xmm28 + +// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x01,0x3d,0x20,0x6a,0xd4] + vpunpckhdq %ymm28, %ymm24, %ymm26 + +// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} +// CHECK: encoding: [0x62,0x01,0x3d,0x22,0x6a,0xd4] + vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} + +// CHECK: vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} {z} +// CHECK: encoding: [0x62,0x01,0x3d,0xa2,0x6a,0xd4] + vpunpckhdq %ymm28, %ymm24, %ymm26 {%k2} {z} + +// CHECK: vpunpckhdq (%rcx), %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x11] + vpunpckhdq (%rcx), %ymm24, %ymm26 + +// CHECK: vpunpckhdq 291(%rax,%r14,8), %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x21,0x3d,0x20,0x6a,0x94,0xf0,0x23,0x01,0x00,0x00] + vpunpckhdq 291(%rax,%r14,8), %ymm24, %ymm26 + +// CHECK: vpunpckhdq (%rcx){1to8}, %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x11] + vpunpckhdq (%rcx){1to8}, %ymm24, %ymm26 + +// CHECK: vpunpckhdq 4064(%rdx), %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x52,0x7f] + vpunpckhdq 4064(%rdx), %ymm24, %ymm26 + +// CHECK: vpunpckhdq 4096(%rdx), %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x92,0x00,0x10,0x00,0x00] + vpunpckhdq 4096(%rdx), %ymm24, %ymm26 + +// CHECK: vpunpckhdq -4096(%rdx), %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x52,0x80] + vpunpckhdq -4096(%rdx), %ymm24, %ymm26 + +// CHECK: vpunpckhdq -4128(%rdx), %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x20,0x6a,0x92,0xe0,0xef,0xff,0xff] + vpunpckhdq -4128(%rdx), %ymm24, %ymm26 + +// CHECK: vpunpckhdq 508(%rdx){1to8}, %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x52,0x7f] + vpunpckhdq 508(%rdx){1to8}, %ymm24, %ymm26 + +// CHECK: vpunpckhdq 512(%rdx){1to8}, %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x92,0x00,0x02,0x00,0x00] + vpunpckhdq 512(%rdx){1to8}, %ymm24, %ymm26 + +// CHECK: vpunpckhdq -512(%rdx){1to8}, %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x52,0x80] + vpunpckhdq -512(%rdx){1to8}, %ymm24, %ymm26 + +// CHECK: vpunpckhdq -516(%rdx){1to8}, %ymm24, %ymm26 +// CHECK: encoding: [0x62,0x61,0x3d,0x30,0x6a,0x92,0xfc,0xfd,0xff,0xff] + vpunpckhdq -516(%rdx){1to8}, %ymm24, %ymm26 + +// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x6c,0xcf] + vpunpcklqdq %xmm23, %xmm27, %xmm17 + +// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} +// CHECK: encoding: [0x62,0xa1,0xa5,0x06,0x6c,0xcf] + vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} + +// CHECK: vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} {z} +// CHECK: encoding: [0x62,0xa1,0xa5,0x86,0x6c,0xcf] + vpunpcklqdq %xmm23, %xmm27, %xmm17 {%k6} {z} + +// CHECK: vpunpcklqdq (%rcx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x09] + vpunpcklqdq (%rcx), %xmm27, %xmm17 + +// CHECK: vpunpcklqdq 291(%rax,%r14,8), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xa1,0xa5,0x00,0x6c,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpunpcklqdq 291(%rax,%r14,8), %xmm27, %xmm17 + +// CHECK: vpunpcklqdq (%rcx){1to2}, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x09] + vpunpcklqdq (%rcx){1to2}, %xmm27, %xmm17 + +// CHECK: vpunpcklqdq 2032(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x4a,0x7f] + vpunpcklqdq 2032(%rdx), %xmm27, %xmm17 + +// CHECK: vpunpcklqdq 2048(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x8a,0x00,0x08,0x00,0x00] + vpunpcklqdq 2048(%rdx), %xmm27, %xmm17 + +// CHECK: vpunpcklqdq -2048(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x4a,0x80] + vpunpcklqdq -2048(%rdx), %xmm27, %xmm17 + +// CHECK: vpunpcklqdq -2064(%rdx), %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x00,0x6c,0x8a,0xf0,0xf7,0xff,0xff] + vpunpcklqdq -2064(%rdx), %xmm27, %xmm17 + +// CHECK: vpunpcklqdq 1016(%rdx){1to2}, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x4a,0x7f] + vpunpcklqdq 1016(%rdx){1to2}, %xmm27, %xmm17 + +// CHECK: vpunpcklqdq 1024(%rdx){1to2}, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x8a,0x00,0x04,0x00,0x00] + vpunpcklqdq 1024(%rdx){1to2}, %xmm27, %xmm17 + +// CHECK: vpunpcklqdq -1024(%rdx){1to2}, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x4a,0x80] + vpunpcklqdq -1024(%rdx){1to2}, %xmm27, %xmm17 + +// CHECK: vpunpcklqdq -1032(%rdx){1to2}, %xmm27, %xmm17 +// CHECK: encoding: [0x62,0xe1,0xa5,0x10,0x6c,0x8a,0xf8,0xfb,0xff,0xff] + vpunpcklqdq -1032(%rdx){1to2}, %xmm27, %xmm17 + +// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x01,0xdd,0x20,0x6c,0xec] + vpunpcklqdq %ymm28, %ymm20, %ymm29 + +// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} +// CHECK: encoding: [0x62,0x01,0xdd,0x27,0x6c,0xec] + vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} + +// CHECK: vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} {z} +// CHECK: encoding: [0x62,0x01,0xdd,0xa7,0x6c,0xec] + vpunpcklqdq %ymm28, %ymm20, %ymm29 {%k7} {z} + +// CHECK: vpunpcklqdq (%rcx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0x29] + vpunpcklqdq (%rcx), %ymm20, %ymm29 + +// CHECK: vpunpcklqdq 291(%rax,%r14,8), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x21,0xdd,0x20,0x6c,0xac,0xf0,0x23,0x01,0x00,0x00] + vpunpcklqdq 291(%rax,%r14,8), %ymm20, %ymm29 + +// CHECK: vpunpcklqdq (%rcx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0x29] + vpunpcklqdq (%rcx){1to4}, %ymm20, %ymm29 + +// CHECK: vpunpcklqdq 4064(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0x6a,0x7f] + vpunpcklqdq 4064(%rdx), %ymm20, %ymm29 + +// CHECK: vpunpcklqdq 4096(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0xaa,0x00,0x10,0x00,0x00] + vpunpcklqdq 4096(%rdx), %ymm20, %ymm29 + +// CHECK: vpunpcklqdq -4096(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0x6a,0x80] + vpunpcklqdq -4096(%rdx), %ymm20, %ymm29 + +// CHECK: vpunpcklqdq -4128(%rdx), %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x20,0x6c,0xaa,0xe0,0xef,0xff,0xff] + vpunpcklqdq -4128(%rdx), %ymm20, %ymm29 + +// CHECK: vpunpcklqdq 1016(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0x6a,0x7f] + vpunpcklqdq 1016(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vpunpcklqdq 1024(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0xaa,0x00,0x04,0x00,0x00] + vpunpcklqdq 1024(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vpunpcklqdq -1024(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0x6a,0x80] + vpunpcklqdq -1024(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vpunpcklqdq -1032(%rdx){1to4}, %ymm20, %ymm29 +// CHECK: encoding: [0x62,0x61,0xdd,0x30,0x6c,0xaa,0xf8,0xfb,0xff,0xff] + vpunpcklqdq -1032(%rdx){1to4}, %ymm20, %ymm29 + +// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0x81,0xe5,0x00,0x6d,0xd8] + vpunpckhqdq %xmm24, %xmm19, %xmm19 + +// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} +// CHECK: encoding: [0x62,0x81,0xe5,0x06,0x6d,0xd8] + vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} + +// CHECK: vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0xe5,0x86,0x6d,0xd8] + vpunpckhqdq %xmm24, %xmm19, %xmm19 {%k6} {z} + +// CHECK: vpunpckhqdq (%rcx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x19] + vpunpckhqdq (%rcx), %xmm19, %xmm19 + +// CHECK: vpunpckhqdq 291(%rax,%r14,8), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xa1,0xe5,0x00,0x6d,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpunpckhqdq 291(%rax,%r14,8), %xmm19, %xmm19 + +// CHECK: vpunpckhqdq (%rcx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x19] + vpunpckhqdq (%rcx){1to2}, %xmm19, %xmm19 + +// CHECK: vpunpckhqdq 2032(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x5a,0x7f] + vpunpckhqdq 2032(%rdx), %xmm19, %xmm19 + +// CHECK: vpunpckhqdq 2048(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x9a,0x00,0x08,0x00,0x00] + vpunpckhqdq 2048(%rdx), %xmm19, %xmm19 + +// CHECK: vpunpckhqdq -2048(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x5a,0x80] + vpunpckhqdq -2048(%rdx), %xmm19, %xmm19 + +// CHECK: vpunpckhqdq -2064(%rdx), %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x00,0x6d,0x9a,0xf0,0xf7,0xff,0xff] + vpunpckhqdq -2064(%rdx), %xmm19, %xmm19 + +// CHECK: vpunpckhqdq 1016(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x5a,0x7f] + vpunpckhqdq 1016(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vpunpckhqdq 1024(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x9a,0x00,0x04,0x00,0x00] + vpunpckhqdq 1024(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vpunpckhqdq -1024(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x5a,0x80] + vpunpckhqdq -1024(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vpunpckhqdq -1032(%rdx){1to2}, %xmm19, %xmm19 +// CHECK: encoding: [0x62,0xe1,0xe5,0x10,0x6d,0x9a,0xf8,0xfb,0xff,0xff] + vpunpckhqdq -1032(%rdx){1to2}, %xmm19, %xmm19 + +// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0x81,0xe5,0x20,0x6d,0xe4] + vpunpckhqdq %ymm28, %ymm19, %ymm20 + +// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} +// CHECK: encoding: [0x62,0x81,0xe5,0x26,0x6d,0xe4] + vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} + +// CHECK: vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} {z} +// CHECK: encoding: [0x62,0x81,0xe5,0xa6,0x6d,0xe4] + vpunpckhqdq %ymm28, %ymm19, %ymm20 {%k6} {z} + +// CHECK: vpunpckhqdq (%rcx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x21] + vpunpckhqdq (%rcx), %ymm19, %ymm20 + +// CHECK: vpunpckhqdq 291(%rax,%r14,8), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xa1,0xe5,0x20,0x6d,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpunpckhqdq 291(%rax,%r14,8), %ymm19, %ymm20 + +// CHECK: vpunpckhqdq (%rcx){1to4}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x21] + vpunpckhqdq (%rcx){1to4}, %ymm19, %ymm20 + +// CHECK: vpunpckhqdq 4064(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x62,0x7f] + vpunpckhqdq 4064(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckhqdq 4096(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0xa2,0x00,0x10,0x00,0x00] + vpunpckhqdq 4096(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckhqdq -4096(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0x62,0x80] + vpunpckhqdq -4096(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckhqdq -4128(%rdx), %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x20,0x6d,0xa2,0xe0,0xef,0xff,0xff] + vpunpckhqdq -4128(%rdx), %ymm19, %ymm20 + +// CHECK: vpunpckhqdq 1016(%rdx){1to4}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x62,0x7f] + vpunpckhqdq 1016(%rdx){1to4}, %ymm19, %ymm20 + +// CHECK: vpunpckhqdq 1024(%rdx){1to4}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0x00,0x04,0x00,0x00] + vpunpckhqdq 1024(%rdx){1to4}, %ymm19, %ymm20 + +// CHECK: vpunpckhqdq -1024(%rdx){1to4}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0x62,0x80] + vpunpckhqdq -1024(%rdx){1to4}, %ymm19, %ymm20 + +// CHECK: vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20 +// CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0xf8,0xfb,0xff,0xff] + vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20 +