Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2133,16 +2133,16 @@ llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_128: GCCBuiltin<"__builtin_ia32_vpmultishiftqb128_mask">, - Intrinsic<[llvm_v16i8_ty], [llvm_v16i8_ty, - llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + Intrinsic<[llvm_v2i64_ty], [llvm_v2i64_ty, + llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_256: GCCBuiltin<"__builtin_ia32_vpmultishiftqb256_mask">, - Intrinsic<[llvm_v32i8_ty], [llvm_v32i8_ty, - llvm_v32i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + Intrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, + llvm_v4i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_mask_pmultishift_qb_512: GCCBuiltin<"__builtin_ia32_vpmultishiftqb512_mask">, - Intrinsic<[llvm_v64i8_ty], [llvm_v64i8_ty, - llvm_v64i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + Intrinsic<[llvm_v8i64_ty], [llvm_v8i64_ty, + llvm_v8i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; } // Pack ops. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -618,6 +618,14 @@ "\t{$idx, $src1, $dst {${mask}}|" "$dst {${mask}}, $src1, $idx}", []>, EVEX_K, EVEX; + + def mrkz : AVX512AIi8, EVEX_KZ, EVEX; } // Intrinsic call with masking. @@ -2733,6 +2741,11 @@ OpcodeStr # "\t{$src, ${dst} {${mask}}|${dst} {${mask}}, $src}", [], _.ExeDomain>, EVEX, EVEX_K; + def mrkz : AVX512PI, EVEX, EVEX_KZ; + def: Pat<(mstore addr:$ptr, _.KRCWM:$mask, (_.VT _.RC:$src)), (!cast(NAME#_.ZSuffix##mrk) addr:$ptr, _.KRCWM:$mask, _.RC:$src)>; @@ -3798,7 +3811,7 @@ avx512vl_i32_info, avx512vl_i64_info, X86pmuludq, HasAVX512, 1>; defm VPMULTISHIFTQB : avx512_binop_all<0x83, "vpmultishiftqb", SSE_INTALU_ITINS_P, - avx512vl_i8_info, avx512vl_i8_info, + avx512vl_i64_info, avx512vl_i64_info, X86multishift, HasVBMI, 0>, T8PD; multiclass avx512_packs_rmb opc, string OpcodeStr, SDNode OpNode, @@ -6327,7 +6340,13 @@ (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), "vcvtps2ph\t{$src2, $src1, $dst {${mask}}|$dst {${mask}}, $src1, $src2}", []>, EVEX_K; + + def mrkz : AVX512AIi8<0x1D, MRMDestMem, (outs), + (ins x86memop:$dst, _dest.KRCWM:$mask, _src.RC:$src1, i32u8imm:$src2), + "vcvtps2ph\t{$src2, $src1, $dst {${mask}} {z}|$dst {${mask}} {z}, $src1, $src2}", + []>, EVEX_KZ; } + multiclass avx512_cvtps2ph_sae { let hasSideEffects = 0 in defm rb : AVX512_maskable_in_asm<0x1D, MRMDestReg, _dest, @@ -6336,6 +6355,7 @@ "vcvtps2ph", "$src2, {sae}, $src1", "$src1, {sae}, $src2", []>, EVEX_B, AVX512AIi8Base; } + let Predicates = [HasAVX512] in { defm VCVTPS2PHZ : avx512_cvtps2ph, avx512_cvtps2ph_sae, @@ -6343,7 +6363,7 @@ let Predicates = [HasVLX] in { defm VCVTPS2PHZ256 : avx512_cvtps2ph, EVEX, EVEX_V256, EVEX_CD8<32, CD8VH>; - defm VCVTPS2PHZ128 : avx512_cvtps2ph, + defm VCVTPS2PHZ128 : avx512_cvtps2ph, EVEX, EVEX_V128, EVEX_CD8<32, CD8VH>; } } @@ -6855,6 +6875,11 @@ (ins x86memop:$dst, SrcInfo.KRCWM:$mask, SrcInfo.RC:$src), OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, EVEX, EVEX_K; + + def mrkz : AVX512XS8I, EVEX, EVEX_KZ; }//mayStore = 1, mayLoad = 1, hasSideEffects = 0 } @@ -7510,6 +7535,12 @@ OpcodeStr # "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}", []>, EVEX_K, EVEX_CD8<_.EltSize, CD8VT1>; + + def mrkz : AVX5128I, + EVEX_KZ, EVEX_CD8<_.EltSize, CD8VT1>; } multiclass compress_by_vec_width_lowering { Index: test/CodeGen/X86/avx512vbmi-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vbmi-intrinsics.ll +++ test/CodeGen/X86/avx512vbmi-intrinsics.ll @@ -20,24 +20,24 @@ ret <64 x i8> %res4 } -declare <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) +declare <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) -define <64 x i8>@test_int_x86_avx512_mask_pmultishift_qb_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) { +define <8 x i64>@test_int_x86_avx512_mask_pmultishift_qb_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_512: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovq %rdi, %k1 +; CHECK-NEXT: kmovb %edi, %k1 ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm2 {%k1} ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm3 {%k1} {z} ; CHECK-NEXT: vpmultishiftqb %zmm1, %zmm0, %zmm0 -; CHECK-NEXT: vpaddb %zmm3, %zmm2, %zmm1 -; CHECK-NEXT: vpaddb %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddq %zmm0, %zmm1, %zmm0 ; CHECK-NEXT: retq - %res = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) - %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> zeroinitializer, i64 %x3) - %res2 = call <64 x i8> @llvm.x86.avx512.mask.pmultishift.qb.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1) - %res3 = add <64 x i8> %res, %res1 - %res4 = add <64 x i8> %res3, %res2 - ret <64 x i8> %res4 + %res = call <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmultishift.qb.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res3, %res2 + ret <8 x i64> %res4 } declare <64 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64) Index: test/CodeGen/X86/avx512vbmivl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vbmivl-intrinsics.ll +++ test/CodeGen/X86/avx512vbmivl-intrinsics.ll @@ -41,44 +41,44 @@ ret <32 x i8> %res4 } -declare <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) +declare <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64>, <2 x i64>, <2 x i64>, i8) -define <16 x i8>@test_int_x86_avx512_mask_pmultishift_qb_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) { +define <2 x i64>@test_int_x86_avx512_mask_pmultishift_qb_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_128: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf] +; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x83,0xc1] -; CHECK-NEXT: vpaddb %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xcb] -; CHECK-NEXT: vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0] +; CHECK-NEXT: vpaddq %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xcb] +; CHECK-NEXT: vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) - %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %x3) - %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmultishift.qb.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1) - %res3 = add <16 x i8> %res, %res1 - %res4 = add <16 x i8> %res3, %res2 - ret <16 x i8> %res4 + %res = call <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) + %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3) + %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmultishift.qb.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1) + %res3 = add <2 x i64> %res, %res1 + %res4 = add <2 x i64> %res3, %res2 + ret <2 x i64> %res4 } -declare <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8>, <32 x i8>, <32 x i8>, i32) +declare <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64>, <4 x i64>, <4 x i64>, i8) -define <32 x i8>@test_int_x86_avx512_mask_pmultishift_qb_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) { +define <4 x i64>@test_int_x86_avx512_mask_pmultishift_qb_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) { ; CHECK-LABEL: test_int_x86_avx512_mask_pmultishift_qb_256: ; CHECK: ## BB#0: -; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: kmovb %edi, %k1 ## encoding: [0xc5,0xf9,0x92,0xcf] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x83,0xd1] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x83,0xd9] ; CHECK-NEXT: vpmultishiftqb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x83,0xc1] -; CHECK-NEXT: vpaddb %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xcb] -; CHECK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0] +; CHECK-NEXT: vpaddq %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xcb] +; CHECK-NEXT: vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0] ; CHECK-NEXT: retq ## encoding: [0xc3] - %res = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) - %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> zeroinitializer, i32 %x3) - %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmultishift.qb.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1) - %res3 = add <32 x i8> %res, %res1 - %res4 = add <32 x i8> %res3, %res2 - ret <32 x i8> %res4 + %res = call <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3) + %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmultishift.qb.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1) + %res3 = add <4 x i64> %res, %res1 + %res4 = add <4 x i64> %res3, %res2 + ret <4 x i64> %res4 } declare <16 x i8> @llvm.x86.avx512.mask.vpermi2var.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16) Index: test/MC/X86/avx512vbmi-encoding.s =================================================================== --- test/MC/X86/avx512vbmi-encoding.s +++ test/MC/X86/avx512vbmi-encoding.s @@ -541,3 +541,15 @@ //CHECK: vpmultishiftqb 4660(%rax,%r14,8), %zmm29, %zmm30 //CHECK: encoding: [0x62,0x22,0x95,0x40,0x83,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpmultishiftqb (%rax){1to2}, %xmm2, %xmm1 +// CHECK: vpmultishiftqb (%rax){1to2}, %xmm2, %xmm1 +// CHECK: encoding: [0x62,0xf2,0xed,0x18,0x83,0x08] + + vpmultishiftqb (%rax){1to4}, %ymm2, %ymm1 +// CHECK: vpmultishiftqb (%rax){1to4}, %ymm2, %ymm1 +// CHECK: encoding: [0x62,0xf2,0xed,0x38,0x83,0x08] + + vpmultishiftqb (%rax){1to8}, %zmm2, %zmm1 +// CHECK: vpmultishiftqb (%rax){1to8}, %zmm2, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xed,0x58,0x83,0x08] + Index: test/MC/X86/x86-64-avx512bw_vl.s =================================================================== --- test/MC/X86/x86-64-avx512bw_vl.s +++ test/MC/X86/x86-64-avx512bw_vl.s @@ -9823,3 +9823,27 @@ // CHECK: encoding: [0x62,0xa1,0x7f,0xae,0x7f,0xd9] vmovdqu8.s %ymm19, %ymm17 {%k6} {z} +// CHECK: vmovdqu8 %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7f,0x89,0x7f,0x08] + vmovdqu8 %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu8 %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7f,0xa9,0x7f,0x08] + vmovdqu8 %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu8 %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7f,0xc9,0x7f,0x08] + vmovdqu8 %zmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu16 %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xff,0x89,0x7f,0x08] + vmovdqu16 %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu16 %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xff,0xa9,0x7f,0x08] + vmovdqu16 %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu16 %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xff,0xc9,0x7f,0x08] + vmovdqu16 %zmm1, (%rax) {%k1} {z} + Index: test/MC/X86/x86-64-avx512dq.s =================================================================== --- test/MC/X86/x86-64-avx512dq.s +++ test/MC/X86/x86-64-avx512dq.s @@ -4136,3 +4136,19 @@ // CHECK: encoding: [0x62,0x62,0x7d,0x48,0x19,0x9a,0xf8,0xfb,0xff,0xff] vbroadcastf32x2 -1032(%rdx), %zmm27 +// CHECK: vextractf32x8 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xc9,0x1b,0x08,0x00] + vextractf32x8 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vextractf64x2 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xfd,0xc9,0x19,0x08,0x00] + vextractf64x2 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vextracti64x2 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xfd,0xc9,0x39,0x08,0x00] + vextracti64x2 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vextracti32x8 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xc9,0x3b,0x08,0x00] + vextracti32x8 $0, %zmm1, (%rax) {%k1} {z} + Index: test/MC/X86/x86-64-avx512dq_vl.s =================================================================== --- test/MC/X86/x86-64-avx512dq_vl.s +++ test/MC/X86/x86-64-avx512dq_vl.s @@ -4824,3 +4824,11 @@ // CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x19,0x9a,0xf8,0xfb,0xff,0xff] vbroadcastf32x2 -1032(%rdx), %ymm19 +// CHECK: vextractf64x2 $0, %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xfd,0xa9,0x19,0x08,0x00] + vextractf64x2 $0, %ymm1, (%rax) {%k1} {z} + +// CHECK: vextracti64x2 $0, %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xfd,0xa9,0x39,0x08,0x00] + vextracti64x2 $0, %ymm1, (%rax) {%k1} {z} + Index: test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- test/MC/X86/x86-64-avx512f_vl.s +++ test/MC/X86/x86-64-avx512f_vl.s @@ -22963,3 +22963,310 @@ // CHECK: encoding: [0x62,0xa1,0x7c,0xa9,0x11,0xde] vmovups.s %ymm19, %ymm22 {%k1} {z} +// CHECK: vpcompressd %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xc9,0x8b,0x08] + vpcompressd %zmm1, (%rax) {%k1} {z} + +// CHECK: vpcompressd %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xa9,0x8b,0x08] + vpcompressd %ymm1, (%rax) {%k1} {z} + +// CHECK: vpcompressd %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0x89,0x8b,0x08] + vpcompressd %xmm1, (%rax) {%k1} {z} + +// CHECK: vpcompressq %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0xc9,0x8b,0x08] + vpcompressq %zmm1, (%rax) {%k1} {z} + +// CHECK: vpcompressq %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0xa9,0x8b,0x08] + vpcompressq %ymm1, (%rax) {%k1} {z} + +// CHECK: vpcompressq %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0x89,0x8b,0x08] + vpcompressq %xmm1, (%rax) {%k1} {z} + +// CHECK: vcompressps %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xc9,0x8a,0x08] + vcompressps %zmm1, (%rax) {%k1} {z} + +// CHECK: vcompressps %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0xa9,0x8a,0x08] + vcompressps %ymm1, (%rax) {%k1} {z} + +// CHECK: vcompressps %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7d,0x89,0x8a,0x08] + vcompressps %xmm1, (%rax) {%k1} {z} + +// CHECK: vcompresspd %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0xc9,0x8a,0x08] + vcompresspd %zmm1, (%rax) {%k1} {z} + +// CHECK: vcompresspd %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0xa9,0x8a,0x08] + vcompresspd %ymm1, (%rax) {%k1} {z} + +// CHECK: vcompresspd %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0x89,0x8a,0x08] + vcompresspd %xmm1, (%rax) {%k1} {z} + +// CHECK: vcvtps2ph $0, %xmm2, (%rax) {%k1} +// CHECK: encoding: [0x62,0xf3,0x7d,0x09,0x1d,0x10,0x00] + vcvtps2ph $0, %xmm2, (%rax) {%k1} + +// CHECK: vcvtps2ph $2, %xmm2, (%rcx) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0x89,0x1d,0x11,0x02] + vcvtps2ph $2, %xmm2, (%rcx) {%k1} {z} + +// CHECK: vcvtps2ph $2, %ymm2, (%rcx) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xa9,0x1d,0x11,0x02] + vcvtps2ph $2, %ymm2, (%rcx) {%k1} {z} + +// CHECK: vcvtps2ph $2, %zmm2, (%rcx) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xc9,0x1d,0x11,0x02] + vcvtps2ph $2, %zmm2, (%rcx) {%k1} {z} + +// CHECK: vextractf32x4 $0, %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xa9,0x19,0x08,0x00] + vextractf32x4 $0, %ymm1, (%rax) {%k1} {z} + +// CHECK: vextractf32x4 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xc9,0x19,0x08,0x00] + vextractf32x4 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vextractf64x4 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xfd,0xc9,0x1b,0x08,0x00] + vextractf64x4 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vextracti32x4 $0, %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xa9,0x39,0x08,0x00] + vextracti32x4 $0, %ymm1, (%rax) {%k1} {z} + +// CHECK: vextracti32x4 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0x7d,0xc9,0x39,0x08,0x00] + vextracti32x4 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vextracti64x4 $0, %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xfd,0xc9,0x3b,0x08,0x00] + vextracti64x4 $0, %zmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqa32 %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7d,0x89,0x7f,0x08] + vmovdqa32 %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqa32 %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7d,0xa9,0x7f,0x08] + vmovdqa32 %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovdqa32 %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7d,0xc9,0x7f,0x08] + vmovdqa32 %zmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqa64 %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0x89,0x7f,0x08] + vmovdqa64 %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqa64 %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0xa9,0x7f,0x08] + vmovdqa64 %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovdqa64 %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0xc9,0x7f,0x08] + vmovdqa64 %zmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu32 %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7e,0x89,0x7f,0x08] + vmovdqu32 %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu32 %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7e,0xa9,0x7f,0x08] + vmovdqu32 %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu32 %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7e,0xc9,0x7f,0x08] + vmovdqu32 %zmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu64 %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfe,0x89,0x7f,0x08] + vmovdqu64 %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu64 %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfe,0xa9,0x7f,0x08] + vmovdqu64 %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovdqu64 %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfe,0xc9,0x7f,0x08] + vmovdqu64 %zmm1, (%rax) {%k1} {z} + +// CHECK: vmovupd %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0x89,0x11,0x08] + vmovupd %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovupd %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0xa9,0x11,0x08] + vmovupd %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovupd %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0xc9,0x11,0x08] + vmovupd %zmm1, (%rax) {%k1} {z} +// CHECK: vmovapd %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0x89,0x29,0x08] + vmovapd %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovapd %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0xa9,0x29,0x08] + vmovapd %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovapd %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0xfd,0xc9,0x29,0x08] + vmovapd %zmm1, (%rax) {%k1} {z} +// CHECK: vmovups %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7c,0x89,0x11,0x08] + vmovups %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovups %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7c,0xa9,0x11,0x08] + vmovups %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovups %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7c,0xc9,0x11,0x08] + vmovups %zmm1, (%rax) {%k1} {z} +// CHECK: vmovaps %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7c,0x89,0x29,0x08] + vmovaps %xmm1, (%rax) {%k1} {z} + +// CHECK: vmovaps %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7c,0xa9,0x29,0x08] + vmovaps %ymm1, (%rax) {%k1} {z} + +// CHECK: vmovaps %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf1,0x7c,0xc9,0x29,0x08] + vmovaps %zmm1, (%rax) {%k1} {z} +// CHECK: vpmovsqb %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x22,0x08] + vpmovsqb %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqb %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x22,0x08] + vpmovsqb %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqb %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x22,0x08] + vpmovsqb %zmm1, (%rax) {%k1} {z} +// CHECK: vpmovusqb %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x12,0x08] + vpmovusqb %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqb %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x12,0x08] + vpmovusqb %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqb %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x12,0x08] + vpmovusqb %zmm1, (%rax) {%k1} {z} +// CHECK: vpmovqb %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x32,0x08] + vpmovqb %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovqb %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x32,0x08] + vpmovqb %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovqb %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x32,0x08] + vpmovqb %zmm1, (%rax) {%k1} {z} +// CHECK: vpmovqw %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x34,0x08] + vpmovqw %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovqw %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x34,0x08] + vpmovqw %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovqw %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x34,0x08] + vpmovqw %zmm1, (%rax) {%k1} {z} +// CHECK: vpmovsqw %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x24,0x08] + vpmovsqw %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqw %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x24,0x08] + vpmovsqw %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqw %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x24,0x08] + vpmovsqw %xmm1, (%rax) {%k1} {z} +// CHECK: vpmovusqw %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x14,0x08] + vpmovusqw %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqw %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x14,0x08] + vpmovusqw %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqw %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x14,0x08] + vpmovusqw %zmm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqd %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x15,0x08] + vpmovusqd %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqd %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x15,0x08] + vpmovusqd %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovusqd %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x15,0x08] + vpmovusqd %zmm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqd %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x25,0x08] + vpmovsqd %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqd %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x25,0x08] + vpmovsqd %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovsqd %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x25,0x08] + vpmovsqd %zmm1, (%rax) {%k1} {z} + +// CHECK: vpmovdb %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x31,0x08] + vpmovdb %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovdb %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x31,0x08] + vpmovdb %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovdb %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x31,0x08] + vpmovdb %zmm1, (%rax) {%k1} {z} + +// CHECK: vpmovsdb %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x21,0x08] + vpmovsdb %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovsdb %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x21,0x08] + vpmovsdb %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovsdb %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x21,0x08] + vpmovsdb %zmm1, (%rax) {%k1} {z} + +// CHECK: vpmovusdb %xmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0x89,0x11,0x08] + vpmovusdb %xmm1, (%rax) {%k1} {z} + +// CHECK: vpmovusdb %ymm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xa9,0x11,0x08] + vpmovusdb %ymm1, (%rax) {%k1} {z} + +// CHECK: vpmovusdb %zmm1, (%rax) {%k1} {z} +// CHECK: encoding: [0x62,0xf2,0x7e,0xc9,0x11,0x08] + vpmovusdb %zmm1, (%rax) {%k1} {z} +