Index: include/llvm/IR/IntrinsicsX86.td =================================================================== --- include/llvm/IR/IntrinsicsX86.td +++ include/llvm/IR/IntrinsicsX86.td @@ -2279,6 +2279,78 @@ GCCBuiltin<"__builtin_ia32_extracti64x4_mask">, Intrinsic<[llvm_v4i64_ty], [llvm_v8i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + + def int_x86_avx512_mask_insertf32x4_256 : + GCCBuiltin<"__builtin_ia32_insertf32x4_256_mask">, + Intrinsic<[llvm_v8f32_ty], + [llvm_v8f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v8f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf32x4_512 : + GCCBuiltin<"__builtin_ia32_insertf32x4_512_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf32x8_512 : + GCCBuiltin<"__builtin_ia32_insertf32x8_mask">, + Intrinsic<[llvm_v16f32_ty], + [llvm_v16f32_ty, llvm_v8f32_ty, llvm_i32_ty, llvm_v16f32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf64x2_256 : + GCCBuiltin<"__builtin_ia32_insertf64x2_256_mask">, + Intrinsic<[llvm_v4f64_ty], + [llvm_v4f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v4f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf64x2_512 : + GCCBuiltin<"__builtin_ia32_insertf64x2_512_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_insertf64x4_512 : + GCCBuiltin<"__builtin_ia32_insertf64x4_mask">, + Intrinsic<[llvm_v8f64_ty], + [llvm_v8f64_ty, llvm_v4f64_ty, llvm_i32_ty, llvm_v8f64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti32x4_256 : + GCCBuiltin<"__builtin_ia32_inserti32x4_256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v8i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v8i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti32x4_512 : + GCCBuiltin<"__builtin_ia32_inserti32x4_512_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v4i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti32x8_512 : + GCCBuiltin<"__builtin_ia32_inserti32x8_mask">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v16i32_ty, llvm_v8i32_ty, llvm_i32_ty, llvm_v16i32_ty, llvm_i16_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti64x2_256 : + GCCBuiltin<"__builtin_ia32_inserti64x2_256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v4i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v4i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti64x2_512 : + GCCBuiltin<"__builtin_ia32_inserti64x2_512_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v2i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; + + def int_x86_avx512_mask_inserti64x4_512 : + GCCBuiltin<"__builtin_ia32_inserti64x4_mask">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v8i64_ty, llvm_v4i64_ty, llvm_i32_ty, llvm_v8i64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional load ops Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -15949,7 +15949,8 @@ Mask, PassThru, Subtarget, DAG); } case INTR_TYPE_3OP_IMM8_MASK: - case INTR_TYPE_3OP_MASK: { + case INTR_TYPE_3OP_MASK: + case INSERT_SUBVEC: { SDValue Src1 = Op.getOperand(1); SDValue Src2 = Op.getOperand(2); SDValue Src3 = Op.getOperand(3); @@ -15958,6 +15959,13 @@ if (IntrData->Type == INTR_TYPE_3OP_IMM8_MASK) Src3 = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Src3); + else if (IntrData->Type == INSERT_SUBVEC) { + // imm should be adapted to ISD::INSERT_SUBVECTOR behavior + unsigned imm = cast(Src3)->getZExtValue(); + imm *= Src2.getValueType().getVectorNumElements(); + Src3 = DAG.getTargetConstant(imm, dl, MVT::i32); + } + // We specify 2 possible opcodes for intrinsics with rounding modes. // First, we check if the intrinsic may have non-default rounding mode, // (IntrData->Opc1 != 0), then we check the rounding mode operand. Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -471,84 +471,123 @@ //===----------------------------------------------------------------------===// // AVX-512 - VECTOR INSERT // - -multiclass vinsert_for_size_no_alt { +multiclass vinsert_for_size { let hasSideEffects = 0, ExeDomain = To.ExeDomain in { - def rr : AVX512AIi8, - EVEX_4V, EVEX_V512; + defm rr : AVX512_maskable, AVX512AIi8Base, EVEX_4V; - let mayLoad = 1 in - def rm : AVX512AIi8, - EVEX_4V, EVEX_V512, EVEX_CD8; + let mayLoad = 1 in + defm rm : AVX512_maskable, AVX512AIi8Base, EVEX_4V, + EVEX_CD8; } } -multiclass vinsert_for_size : - vinsert_for_size_no_alt { - // Codegen pattern with the alternative types, e.g. v2i64 -> v8i64 for - // vinserti32x4. Only add this if 64x2 and friends are not supported - // natively via AVX512DQ. - let Predicates = [NoDQI] in +multiclass vinsert_for_size_lowering p> { + let Predicates = p in { def : Pat<(vinsert_insert:$ins - (AltTo.VT VR512:$src1), (AltFrom.VT From.RC:$src2), (iPTR imm)), - (AltTo.VT (!cast(NAME # From.EltSize # "x4rr") - VR512:$src1, From.RC:$src2, - (INSERT_get_vinsert_imm VR512:$ins)))>; + (To.VT To.RC:$src1), (From.VT From.RC:$src2), (iPTR imm)), + (To.VT (!cast(InstrStr#"rr") + To.RC:$src1, From.RC:$src2, + (INSERT_get_vinsert_imm To.RC:$ins)))>; + + def : Pat<(vinsert_insert:$ins + (To.VT To.RC:$src1), + (From.VT (bitconvert (From.LdFrag addr:$src2))), + (iPTR imm)), + (To.VT (!cast(InstrStr#"rm") + To.RC:$src1, addr:$src2, + (INSERT_get_vinsert_imm To.RC:$ins)))>; + } } multiclass vinsert_for_type { - defm NAME # "32x4" : vinsert_for_size, + X86VectorVTInfo< 8, EltVT32, VR256X>, + vinsert128_insert>, EVEX_V256; + + defm NAME # "32x4Z" : vinsert_for_size, X86VectorVTInfo<16, EltVT32, VR512>, - X86VectorVTInfo< 2, EltVT64, VR128X>, + vinsert128_insert>, EVEX_V512; + + defm NAME # "64x4Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert128_insert, - INSERT_get_vinsert128_imm>; - let Predicates = [HasDQI] in - defm NAME # "64x2" : vinsert_for_size_no_alt, VEX_W, EVEX_V512; + + let Predicates = [HasVLX, HasDQI] in + defm NAME # "64x2Z256" : vinsert_for_size, + X86VectorVTInfo< 4, EltVT64, VR256X>, + vinsert128_insert>, VEX_W, EVEX_V256; + + let Predicates = [HasDQI] in { + defm NAME # "64x2Z" : vinsert_for_size, X86VectorVTInfo< 8, EltVT64, VR512>, - vinsert128_insert, - INSERT_get_vinsert128_imm>, VEX_W; - defm NAME # "64x4" : vinsert_for_size, - X86VectorVTInfo< 8, EltVT64, VR512>, - X86VectorVTInfo< 8, EltVT32, VR256>, - X86VectorVTInfo<16, EltVT32, VR512>, - vinsert256_insert, - INSERT_get_vinsert256_imm>, VEX_W; - let Predicates = [HasDQI] in - defm NAME # "32x8" : vinsert_for_size_no_alt, - X86VectorVTInfo<16, EltVT32, VR512>, - vinsert256_insert, - INSERT_get_vinsert256_imm>; + vinsert128_insert>, VEX_W, EVEX_V512; + + defm NAME # "32x8Z" : vinsert_for_size, + X86VectorVTInfo<16, EltVT32, VR512>, + vinsert256_insert>, EVEX_V512; + } } defm VINSERTF : vinsert_for_type; defm VINSERTI : vinsert_for_type; +// Codegen pattern with the alternative types, +// Only add this if 64x2 and its friends are not supported natively via AVX512DQ. +defm : vinsert_for_size_lowering<"VINSERTF32x4Z256", v2f64x_info, v4f64x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v2i64x_info, v4i64x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX, NoDQI]>; + +defm : vinsert_for_size_lowering<"VINSERTF32x4Z", v2f64x_info, v8f64_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v2i64x_info, v8i64_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512, NoDQI]>; + +defm : vinsert_for_size_lowering<"VINSERTF64x4Z", v8f32x_info, v16f32_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>; +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v8i32x_info, v16i32_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512, NoDQI]>; + +// Codegen pattern with the alternative types insert VEC128 into VEC256 +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v8i16x_info, v16i16x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z256", v16i8x_info, v32i8x_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasVLX]>; +// Codegen pattern with the alternative types insert VEC128 into VEC512 +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v8i16x_info, v32i16_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; +defm : vinsert_for_size_lowering<"VINSERTI32x4Z", v16i8x_info, v64i8_info, + vinsert128_insert, INSERT_get_vinsert128_imm, [HasAVX512]>; +// Codegen pattern with the alternative types insert VEC256 into VEC512 +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v16i16x_info, v32i16_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; +defm : vinsert_for_size_lowering<"VINSERTI64x4Z", v32i8x_info, v64i8_info, + vinsert256_insert, INSERT_get_vinsert256_imm, [HasAVX512]>; + // vinsertps - insert f32 to XMM def VINSERTPSzrr : AVX512AIi8<0x21, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, VR128X:$src2, u8imm:$src3), Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -7898,7 +7898,7 @@ []>, Sched<[WriteFShuffleLd, ReadAfterLd]>, VEX_4V, VEX_L; } -let Predicates = [HasAVX] in { +let Predicates = [HasAVX, NoVLX] in { def : Pat<(vinsert128_insert:$ins (v8f32 VR256:$src1), (v4f32 VR128:$src2), (iPTR imm)), (VINSERTF128rr VR256:$src1, VR128:$src2, @@ -8560,7 +8560,7 @@ []>, Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L; } -let Predicates = [HasAVX2] in { +let Predicates = [HasAVX2, NoVLX] in { def : Pat<(vinsert128_insert:$ins (v4i64 VR256:$src1), (v2i64 VR128:$src2), (iPTR imm)), (VINSERTI128rr VR256:$src1, VR128:$src2, Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -29,7 +29,7 @@ INTR_TYPE_SCALAR_MASK_RM, INTR_TYPE_3OP_SCALAR_MASK_RM, COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, - EXPAND_FROM_MEM, BLEND + EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC }; struct IntrinsicData { @@ -697,6 +697,30 @@ X86ISD::VGETMANT, 0), X86_INTRINSIC_DATA(avx512_mask_getmant_ss, INTR_TYPE_3OP_SCALAR_MASK_RM, X86ISD::VGETMANT, 0), + X86_INTRINSIC_DATA(avx512_mask_insertf32x4_256, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_insertf32x4_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_insertf32x8_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_insertf64x2_256, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_insertf64x2_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_insertf64x4_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_inserti32x4_256, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_inserti32x4_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_inserti32x8_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_inserti64x2_256, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_inserti64x2_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), + X86_INTRINSIC_DATA(avx512_mask_inserti64x4_512, INSERT_SUBVEC, + ISD::INSERT_SUBVECTOR, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_128, INTR_TYPE_1OP_MASK, ISD::CTLZ, 0), X86_INTRINSIC_DATA(avx512_mask_lzcnt_d_256, INTR_TYPE_1OP_MASK, Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -231,3 +231,102 @@ ret i8 %x2 } +define <8 x i64> @test_insert_128_v8i64(<8 x i64> %x, i64 %y) { +; KNL-LABEL: test_insert_128_v8i64: +; KNL: ## BB#0: +; KNL-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 +; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_insert_128_v8i64: +; SKX: ## BB#0: +; SKX-NEXT: vpinsrq $1, %rdi, %xmm0, %xmm1 +; SKX-NEXT: vinserti64x2 $0, %xmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %r = insertelement <8 x i64> %x, i64 %y, i32 1 + ret <8 x i64> %r +} + +define <16 x i32> @test_insert_128_v16i32(<16 x i32> %x, i32 %y) { +; KNL-LABEL: test_insert_128_v16i32: +; KNL: ## BB#0: +; KNL-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 +; KNL-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_insert_128_v16i32: +; SKX: ## BB#0: +; SKX-NEXT: vpinsrd $1, %edi, %xmm0, %xmm1 +; SKX-NEXT: vinserti32x4 $0, %xmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %r = insertelement <16 x i32> %x, i32 %y, i32 1 + ret <16 x i32> %r +} + +define <8 x double> @test_insert_128_v8f64(<8 x double> %x, double %y) { +; KNL-LABEL: test_insert_128_v8f64: +; KNL: ## BB#0: +; KNL-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm0[0],xmm1[0] +; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_insert_128_v8f64: +; SKX: ## BB#0: +; SKX-NEXT: vunpcklpd %xmm1, %xmm0, %xmm1 +; SKX-NEXT: vinsertf64x2 $0, %xmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %r = insertelement <8 x double> %x, double %y, i32 1 + ret <8 x double> %r +} + +define <16 x float> @test_insert_128_v16f32(<16 x float> %x, float %y) { +; KNL-LABEL: test_insert_128_v16f32: +; KNL: ## BB#0: +; KNL-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1 +; KNL-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_insert_128_v16f32: +; SKX: ## BB#0: +; SKX-NEXT: vinsertps $16, %xmm1, %xmm0, %xmm1 +; SKX-NEXT: vinsertf32x4 $0, %xmm1, %zmm0, %zmm0 +; SKX-NEXT: retq + %r = insertelement <16 x float> %x, float %y, i32 1 + ret <16 x float> %r +} + +define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) { +; KNL-LABEL: test_insert_128_v16i16: +; KNL: ## BB#0: +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_insert_128_v16i16: +; SKX: ## BB#0: +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1 +; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %r = insertelement <16 x i16> %x, i16 %y, i32 10 + ret <16 x i16> %r +} + +define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) { +; KNL-LABEL: test_insert_128_v32i8: +; KNL: ## BB#0: +; KNL-NEXT: vextracti128 $1, %ymm0, %xmm1 +; KNL-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 +; KNL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; KNL-NEXT: retq +; +; SKX-LABEL: test_insert_128_v32i8: +; SKX: ## BB#0: +; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1 +; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 +; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; SKX-NEXT: retq + %r = insertelement <32 x i8> %x, i8 %y, i32 20 + ret <32 x i8> %r +} Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -4080,3 +4080,86 @@ ret <16 x float> %res2 } +declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i8) + +define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vaddps %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 %x4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i8 -1) + %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i8 %x4) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res2, %res3 + ret <16 x float> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i8) + +define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinserti32x4 $1, %xmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 %x4) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i8 -1) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i8 %x4) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + +declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x4_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1) + %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res2, %res3 + ret <8 x double> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x4_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm3, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res2, %res3 + ret <8 x i64> %res4 +} + + Index: test/CodeGen/X86/avx512dq-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512dq-intrinsics.ll +++ test/CodeGen/X86/avx512dq-intrinsics.ll @@ -356,3 +356,83 @@ %res4 = fadd <8 x float> %res2, %res3 ret <8 x float> %res4 } + +declare <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float>, <8 x float>, i32, <16 x float>, i16) + +define <16 x float>@test_int_x86_avx512_mask_insertf32x8_512(<16 x float> %x0, <8 x float> %x1, <16 x float> %x3, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x8_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinsertf32x8 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddps %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vaddps %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 %x4) + %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4) + %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x8.512(<16 x float> %x0, <8 x float> %x1, i32 1, <16 x float> %x3, i16 -1) + %res3 = fadd <16 x float> %res, %res1 + %res4 = fadd <16 x float> %res2, %res3 + ret <16 x float> %res4 +} + +declare <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double>, <2 x double>, i32, <8 x double>, i8) + +define <8 x double>@test_int_x86_avx512_mask_insertf64x2_512(<8 x double> %x0, <2 x double> %x1,<8 x double> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %zmm0, %zmm0 +; CHECK-NEXT: vaddpd %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vaddpd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 %x4) + %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4) + %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x2.512(<8 x double> %x0, <2 x double> %x1, i32 1, <8 x double> %x3, i8 -1) + %res3 = fadd <8 x double> %res, %res1 + %res4 = fadd <8 x double> %res3, %res2 + ret <8 x double> %res4 +} + +declare <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32>, <8 x i32>, i32, <16 x i32>, i16) + +define <16 x i32>@test_int_x86_avx512_mask_inserti32x8_512(<16 x i32> %x0, <8 x i32> %x1, <16 x i32> %x3, i16 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x8_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinserti32x8 $1, %ymm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddd %zmm0, %zmm1, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4) + %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4) + %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x8.512(<16 x i32> %x0, <8 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res3, %res2 + ret <16 x i32> %res4 +} + +declare <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64>, <2 x i64>, i32, <8 x i64>, i8) + +define <8 x i64>@test_int_x86_avx512_mask_inserti64x2_512(<8 x i64> %x0, <2 x i64> %x1, <8 x i64> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm2 {%k1} +; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm3 {%k1} {z} +; CHECK-NEXT: vinserti64x2 $1, %xmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm3, %zmm2, %zmm1 +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4) + %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4) + %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x2.512(<8 x i64> %x0, <2 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res2, %res3 + ret <8 x i64> %res4 +} Index: test/CodeGen/X86/avx512dqvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512dqvl-intrinsics.ll +++ test/CodeGen/X86/avx512dqvl-intrinsics.ll @@ -1668,3 +1668,43 @@ %res4 = fadd <2 x double> %res3, %res2 ret <2 x double> %res4 } + +declare <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double>, <2 x double>, i32, <4 x double>, i8) + +define <4 x double>@test_int_x86_avx512_mask_insertf64x2_256(<4 x double> %x0, <2 x double> %x1, <4 x double> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_insertf64x2_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vinsertf64x2 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: vaddpd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 %x4) + %res1 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> %x3, i8 -1) + %res2 = call <4 x double> @llvm.x86.avx512.mask.insertf64x2.256(<4 x double> %x0, <2 x double> %x1, i32 1, <4 x double> zeroinitializer, i8 %x4) + %res3 = fadd <4 x double> %res, %res1 + %res4 = fadd <4 x double> %res2, %res3 + ret <4 x double> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64>, <2 x i64>, i32, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_mask_inserti64x2_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_inserti64x2_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovb %edi, %k1 +; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: vpaddq %ymm3, %ymm0, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 %x4) + %res1 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> %x3, i8 -1) + %res2 = call <4 x i64> @llvm.x86.avx512.mask.inserti64x2.256(<4 x i64> %x0, <2 x i64> %x1, i32 1, <4 x i64> zeroinitializer, i8 %x4) + %res3 = add <4 x i64> %res, %res1 + %res4 = add <4 x i64> %res3, %res2 + ret <4 x i64> %res4 +} Index: test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512vl-intrinsics.ll +++ test/CodeGen/X86/avx512vl-intrinsics.ll @@ -4744,3 +4744,45 @@ ret <4 x i64> %res2 } +declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8) + +define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_insertf32x4_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: vaddps %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4) + %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1) + %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4) + %res3 = fadd <8 x float> %res, %res1 + %res4 = fadd <8 x float> %res2, %res3 + ret <8 x float> %res4 +} + +declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) { +; CHECK-LABEL: test_int_x86_avx512_mask_inserti32x4_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} +; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm3 {%k1} {z} +; CHECK-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm3, %ymm0 +; CHECK-NEXT: retq + + %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4) + %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1) + %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} Index: test/CodeGen/X86/vector-lzcnt-256.ll =================================================================== --- test/CodeGen/X86/vector-lzcnt-256.ll +++ test/CodeGen/X86/vector-lzcnt-256.ll @@ -544,7 +544,7 @@ ; AVX512-NEXT: vpextrw $7, %xmm0, %eax ; AVX512-NEXT: lzcntw %ax, %ax ; AVX512-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 0) ret <16 x i16> %out @@ -742,7 +742,7 @@ ; AVX512-NEXT: vpextrw $7, %xmm0, %eax ; AVX512-NEXT: lzcntw %ax, %ax ; AVX512-NEXT: vpinsrw $7, %eax, %xmm2, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %out = call <16 x i16> @llvm.ctlz.v16i16(<16 x i16> %in, i1 -1) ret <16 x i16> %out @@ -1214,7 +1214,7 @@ ; AVX512-NEXT: lzcntl %eax, %eax ; AVX512-NEXT: addl $-24, %eax ; AVX512-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 0) ret <32 x i8> %out @@ -1620,7 +1620,7 @@ ; AVX512-NEXT: lzcntl %eax, %eax ; AVX512-NEXT: addl $-24, %eax ; AVX512-NEXT: vpinsrb $15, %eax, %xmm2, %xmm0 -; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0 ; AVX512-NEXT: retq %out = call <32 x i8> @llvm.ctlz.v32i8(<32 x i8> %in, i1 -1) ret <32 x i8> %out Index: test/MC/X86/avx512-encodings.s =================================================================== --- test/MC/X86/avx512-encodings.s +++ test/MC/X86/avx512-encodings.s @@ -14958,6 +14958,166 @@ // CHECK: encoding: [0x62,0xf2,0xc5,0x08,0x43,0x92,0xf8,0xfb,0xff,0xff] vgetexpsd -1032(%rdx), %xmm7, %xmm2 +// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0xab] + vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1} +// CHECK: encoding: [0x62,0x73,0x2d,0x41,0x18,0xdb,0xab] + vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1} + +// CHECK: vinsertf32x4 $171, %xmm3, %zmm26, %zmm11 {%k1} {z} +// CHECK: encoding: [0x62,0x73,0x2d,0xc1,0x18,0xdb,0xab] + vinsertf32x4 $0xab, %xmm3, %zmm26, %zmm11 {%k1} {z} + +// CHECK: vinsertf32x4 $123, %xmm3, %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0xdb,0x7b] + vinsertf32x4 $0x7b, %xmm3, %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $123, (%rcx), %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x19,0x7b] + vinsertf32x4 $0x7b, (%rcx), %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x33,0x2d,0x40,0x18,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinsertf32x4 $0x7b, 291(%rax,%r14,8), %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $123, 2032(%rdx), %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x7f,0x7b] + vinsertf32x4 $0x7b, 2032(%rdx), %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $123, 2048(%rdx), %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0x00,0x08,0x00,0x00,0x7b] + vinsertf32x4 $0x7b, 2048(%rdx), %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $123, -2048(%rdx), %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x5a,0x80,0x7b] + vinsertf32x4 $0x7b, -2048(%rdx), %zmm26, %zmm11 + +// CHECK: vinsertf32x4 $123, -2064(%rdx), %zmm26, %zmm11 +// CHECK: encoding: [0x62,0x73,0x2d,0x40,0x18,0x9a,0xf0,0xf7,0xff,0xff,0x7b] + vinsertf32x4 $0x7b, -2064(%rdx), %zmm26, %zmm11 + +// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0xab] + vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1} +// CHECK: encoding: [0x62,0xf3,0xd5,0x49,0x1a,0xcf,0xab] + vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1} + +// CHECK: vinsertf64x4 $171, %ymm7, %zmm5, %zmm1 {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xd5,0xc9,0x1a,0xcf,0xab] + vinsertf64x4 $0xab, %ymm7, %zmm5, %zmm1 {%k1} {z} + +// CHECK: vinsertf64x4 $123, %ymm7, %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0xcf,0x7b] + vinsertf64x4 $0x7b, %ymm7, %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $123, (%rcx), %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x09,0x7b] + vinsertf64x4 $0x7b, (%rcx), %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $123, 291(%rax,%r14,8), %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xb3,0xd5,0x48,0x1a,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinsertf64x4 $0x7b, 291(%rax,%r14,8), %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $123, 4064(%rdx), %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x7f,0x7b] + vinsertf64x4 $0x7b, 4064(%rdx), %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $123, 4096(%rdx), %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0x00,0x10,0x00,0x00,0x7b] + vinsertf64x4 $0x7b, 4096(%rdx), %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $123, -4096(%rdx), %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x4a,0x80,0x7b] + vinsertf64x4 $0x7b, -4096(%rdx), %zmm5, %zmm1 + +// CHECK: vinsertf64x4 $123, -4128(%rdx), %zmm5, %zmm1 +// CHECK: encoding: [0x62,0xf3,0xd5,0x48,0x1a,0x8a,0xe0,0xef,0xff,0xff,0x7b] + vinsertf64x4 $0x7b, -4128(%rdx), %zmm5, %zmm1 + +// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0xab] + vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 + +// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6} +// CHECK: encoding: [0x62,0xc3,0x15,0x4e,0x38,0xca,0xab] + vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6} + +// CHECK: vinserti32x4 $171, %xmm10, %zmm13, %zmm17 {%k6} {z} +// CHECK: encoding: [0x62,0xc3,0x15,0xce,0x38,0xca,0xab] + vinserti32x4 $0xab, %xmm10, %zmm13, %zmm17 {%k6} {z} + +// CHECK: vinserti32x4 $123, %xmm10, %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xc3,0x15,0x48,0x38,0xca,0x7b] + vinserti32x4 $0x7b, %xmm10, %zmm13, %zmm17 + +// CHECK: vinserti32x4 $123, (%rcx), %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x09,0x7b] + vinserti32x4 $0x7b, (%rcx), %zmm13, %zmm17 + +// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xa3,0x15,0x48,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinserti32x4 $0x7b, 291(%rax,%r14,8), %zmm13, %zmm17 + +// CHECK: vinserti32x4 $123, 2032(%rdx), %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x7f,0x7b] + vinserti32x4 $0x7b, 2032(%rdx), %zmm13, %zmm17 + +// CHECK: vinserti32x4 $123, 2048(%rdx), %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b] + vinserti32x4 $0x7b, 2048(%rdx), %zmm13, %zmm17 + +// CHECK: vinserti32x4 $123, -2048(%rdx), %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x4a,0x80,0x7b] + vinserti32x4 $0x7b, -2048(%rdx), %zmm13, %zmm17 + +// CHECK: vinserti32x4 $123, -2064(%rdx), %zmm13, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x15,0x48,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b] + vinserti32x4 $0x7b, -2064(%rdx), %zmm13, %zmm17 + +// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0xab] + vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 + +// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1} +// CHECK: encoding: [0x62,0xf3,0xb5,0x41,0x3a,0xe4,0xab] + vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1} + +// CHECK: vinserti64x4 $171, %ymm4, %zmm25, %zmm4 {%k1} {z} +// CHECK: encoding: [0x62,0xf3,0xb5,0xc1,0x3a,0xe4,0xab] + vinserti64x4 $0xab, %ymm4, %zmm25, %zmm4 {%k1} {z} + +// CHECK: vinserti64x4 $123, %ymm4, %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xe4,0x7b] + vinserti64x4 $0x7b, %ymm4, %zmm25, %zmm4 + +// CHECK: vinserti64x4 $123, (%rcx), %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x21,0x7b] + vinserti64x4 $0x7b, (%rcx), %zmm25, %zmm4 + +// CHECK: vinserti64x4 $123, 291(%rax,%r14,8), %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xb3,0xb5,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinserti64x4 $0x7b, 291(%rax,%r14,8), %zmm25, %zmm4 + +// CHECK: vinserti64x4 $123, 4064(%rdx), %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x7f,0x7b] + vinserti64x4 $0x7b, 4064(%rdx), %zmm25, %zmm4 + +// CHECK: vinserti64x4 $123, 4096(%rdx), %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b] + vinserti64x4 $0x7b, 4096(%rdx), %zmm25, %zmm4 + +// CHECK: vinserti64x4 $123, -4096(%rdx), %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0x62,0x80,0x7b] + vinserti64x4 $0x7b, -4096(%rdx), %zmm25, %zmm4 + +// CHECK: vinserti64x4 $123, -4128(%rdx), %zmm25, %zmm4 +// CHECK: encoding: [0x62,0xf3,0xb5,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vinserti64x4 $0x7b, -4128(%rdx), %zmm25, %zmm4 + // CHECK: vextractf32x4 $171, %zmm21, %xmm15 // CHECK: encoding: [0x62,0xc3,0x7d,0x48,0x19,0xef,0xab] vextractf32x4 $0xab, %zmm21, %xmm15 Index: test/MC/X86/x86-64-avx512dq.s =================================================================== --- test/MC/X86/x86-64-avx512dq.s +++ test/MC/X86/x86-64-avx512dq.s @@ -2371,6 +2371,326 @@ // CHECK: encoding: [0x62,0xa1,0xff,0xca,0x7a,0xd5] vcvtuqq2ps %zmm21, %ymm18 {%k2} {z} +// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0xab] + vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3} +// CHECK: encoding: [0x62,0x03,0x75,0x43,0x1a,0xe8,0xab] + vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3} + +// CHECK: vinsertf32x8 $171, %ymm24, %zmm17, %zmm29 {%k3} {z} +// CHECK: encoding: [0x62,0x03,0x75,0xc3,0x1a,0xe8,0xab] + vinsertf32x8 $0xab, %ymm24, %zmm17, %zmm29 {%k3} {z} + +// CHECK: vinsertf32x8 $123, %ymm24, %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x03,0x75,0x40,0x1a,0xe8,0x7b] + vinsertf32x8 $0x7b, %ymm24, %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $123, (%rcx), %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x29,0x7b] + vinsertf32x8 $0x7b,(%rcx), %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $123, 291(%rax,%r14,8), %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x23,0x75,0x40,0x1a,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinsertf32x8 $0x7b,291(%rax,%r14,8), %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x7f,0x7b] + vinsertf32x8 $0x7b,4064(%rdx), %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b] + vinsertf32x8 $0x7b,4096(%rdx), %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0x6a,0x80,0x7b] + vinsertf32x8 $0x7b,-4096(%rdx), %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm17, %zmm29 +// CHECK: encoding: [0x62,0x63,0x75,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b] + vinsertf32x8 $0x7b,-4128(%rdx), %zmm17, %zmm29 + +// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0xab] + vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5} +// CHECK: encoding: [0x62,0x23,0x1d,0x45,0x1a,0xee,0xab] + vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5} + +// CHECK: vinsertf32x8 $171, %ymm22, %zmm28, %zmm29 {%k5} {z} +// CHECK: encoding: [0x62,0x23,0x1d,0xc5,0x1a,0xee,0xab] + vinsertf32x8 $0xab, %ymm22, %zmm28, %zmm29 {%k5} {z} + +// CHECK: vinsertf32x8 $123, %ymm22, %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xee,0x7b] + vinsertf32x8 $0x7b, %ymm22, %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $123, (%rcx), %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x29,0x7b] + vinsertf32x8 $0x7b,(%rcx), %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $123, 4660(%rax,%r14,8), %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x23,0x1d,0x40,0x1a,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b] + vinsertf32x8 $0x7b,4660(%rax,%r14,8), %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $123, 4064(%rdx), %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x7f,0x7b] + vinsertf32x8 $0x7b,4064(%rdx), %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $123, 4096(%rdx), %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0x00,0x10,0x00,0x00,0x7b] + vinsertf32x8 $0x7b,4096(%rdx), %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $123, -4096(%rdx), %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0x6a,0x80,0x7b] + vinsertf32x8 $0x7b,-4096(%rdx), %zmm28, %zmm29 + +// CHECK: vinsertf32x8 $123, -4128(%rdx), %zmm28, %zmm29 +// CHECK: encoding: [0x62,0x63,0x1d,0x40,0x1a,0xaa,0xe0,0xef,0xff,0xff,0x7b] + vinsertf32x8 $0x7b,-4128(%rdx), %zmm28, %zmm29 + +// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 +// CHECK: encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0xab] + vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2} +// CHECK: encoding: [0x62,0x83,0x9d,0x42,0x18,0xc9,0xab] + vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2} + +// CHECK: vinsertf64x2 $171, %xmm25, %zmm28, %zmm17 {%k2} {z} +// CHECK: encoding: [0x62,0x83,0x9d,0xc2,0x18,0xc9,0xab] + vinsertf64x2 $0xab, %xmm25, %zmm28, %zmm17 {%k2} {z} + +// CHECK: vinsertf64x2 $123, %xmm25, %zmm28, %zmm17 +// CHECK: encoding: [0x62,0x83,0x9d,0x40,0x18,0xc9,0x7b] + vinsertf64x2 $0x7b, %xmm25, %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $123, (%rcx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x09,0x7b] + vinsertf64x2 $0x7b,(%rcx), %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xa3,0x9d,0x40,0x18,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,291(%rax,%r14,8), %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x7f,0x7b] + vinsertf64x2 $0x7b,2032(%rdx), %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0x00,0x08,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,2048(%rdx), %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x4a,0x80,0x7b] + vinsertf64x2 $0x7b,-2048(%rdx), %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm28, %zmm17 +// CHECK: encoding: [0x62,0xe3,0x9d,0x40,0x18,0x8a,0xf0,0xf7,0xff,0xff,0x7b] + vinsertf64x2 $0x7b,-2064(%rdx), %zmm28, %zmm17 + +// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0xab] + vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7} +// CHECK: encoding: [0x62,0x83,0xf5,0x47,0x18,0xe4,0xab] + vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7} + +// CHECK: vinsertf64x2 $171, %xmm28, %zmm17, %zmm20 {%k7} {z} +// CHECK: encoding: [0x62,0x83,0xf5,0xc7,0x18,0xe4,0xab] + vinsertf64x2 $0xab, %xmm28, %zmm17, %zmm20 {%k7} {z} + +// CHECK: vinsertf64x2 $123, %xmm28, %zmm17, %zmm20 +// CHECK: encoding: [0x62,0x83,0xf5,0x40,0x18,0xe4,0x7b] + vinsertf64x2 $0x7b, %xmm28, %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $123, (%rcx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x21,0x7b] + vinsertf64x2 $0x7b,(%rcx), %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xa3,0xf5,0x40,0x18,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,4660(%rax,%r14,8), %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $123, 2032(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x7f,0x7b] + vinsertf64x2 $0x7b,2032(%rdx), %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $123, 2048(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0x00,0x08,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,2048(%rdx), %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $123, -2048(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0x62,0x80,0x7b] + vinsertf64x2 $0x7b,-2048(%rdx), %zmm17, %zmm20 + +// CHECK: vinsertf64x2 $123, -2064(%rdx), %zmm17, %zmm20 +// CHECK: encoding: [0x62,0xe3,0xf5,0x40,0x18,0xa2,0xf0,0xf7,0xff,0xff,0x7b] + vinsertf64x2 $0x7b,-2064(%rdx), %zmm17, %zmm20 + +// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0xab] + vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 + +// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2} +// CHECK: encoding: [0x62,0x23,0x3d,0x42,0x3a,0xe6,0xab] + vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2} + +// CHECK: vinserti32x8 $171, %ymm22, %zmm24, %zmm28 {%k2} {z} +// CHECK: encoding: [0x62,0x23,0x3d,0xc2,0x3a,0xe6,0xab] + vinserti32x8 $0xab, %ymm22, %zmm24, %zmm28 {%k2} {z} + +// CHECK: vinserti32x8 $123, %ymm22, %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xe6,0x7b] + vinserti32x8 $0x7b, %ymm22, %zmm24, %zmm28 + +// CHECK: vinserti32x8 $123, (%rcx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x21,0x7b] + vinserti32x8 $0x7b,(%rcx), %zmm24, %zmm28 + +// CHECK: vinserti32x8 $123, 291(%rax,%r14,8), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x23,0x3d,0x40,0x3a,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinserti32x8 $0x7b,291(%rax,%r14,8), %zmm24, %zmm28 + +// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x7f,0x7b] + vinserti32x8 $0x7b,4064(%rdx), %zmm24, %zmm28 + +// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b] + vinserti32x8 $0x7b,4096(%rdx), %zmm24, %zmm28 + +// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0x62,0x80,0x7b] + vinserti32x8 $0x7b,-4096(%rdx), %zmm24, %zmm28 + +// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm24, %zmm28 +// CHECK: encoding: [0x62,0x63,0x3d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vinserti32x8 $0x7b,-4128(%rdx), %zmm24, %zmm28 + +// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0xab] + vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 + +// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7} +// CHECK: encoding: [0x62,0x03,0x5d,0x47,0x3a,0xe0,0xab] + vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7} + +// CHECK: vinserti32x8 $171, %ymm24, %zmm20, %zmm28 {%k7} {z} +// CHECK: encoding: [0x62,0x03,0x5d,0xc7,0x3a,0xe0,0xab] + vinserti32x8 $0xab, %ymm24, %zmm20, %zmm28 {%k7} {z} + +// CHECK: vinserti32x8 $123, %ymm24, %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x03,0x5d,0x40,0x3a,0xe0,0x7b] + vinserti32x8 $0x7b, %ymm24, %zmm20, %zmm28 + +// CHECK: vinserti32x8 $123, (%rcx), %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x21,0x7b] + vinserti32x8 $0x7b,(%rcx), %zmm20, %zmm28 + +// CHECK: vinserti32x8 $123, 4660(%rax,%r14,8), %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x23,0x5d,0x40,0x3a,0xa4,0xf0,0x34,0x12,0x00,0x00,0x7b] + vinserti32x8 $0x7b,4660(%rax,%r14,8), %zmm20, %zmm28 + +// CHECK: vinserti32x8 $123, 4064(%rdx), %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x7f,0x7b] + vinserti32x8 $0x7b,4064(%rdx), %zmm20, %zmm28 + +// CHECK: vinserti32x8 $123, 4096(%rdx), %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0x00,0x10,0x00,0x00,0x7b] + vinserti32x8 $0x7b,4096(%rdx), %zmm20, %zmm28 + +// CHECK: vinserti32x8 $123, -4096(%rdx), %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0x62,0x80,0x7b] + vinserti32x8 $0x7b,-4096(%rdx), %zmm20, %zmm28 + +// CHECK: vinserti32x8 $123, -4128(%rdx), %zmm20, %zmm28 +// CHECK: encoding: [0x62,0x63,0x5d,0x40,0x3a,0xa2,0xe0,0xef,0xff,0xff,0x7b] + vinserti32x8 $0x7b,-4128(%rdx), %zmm20, %zmm28 + +// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0xab] + vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 + +// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7} +// CHECK: encoding: [0x62,0x03,0xed,0x47,0x38,0xe2,0xab] + vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7} + +// CHECK: vinserti64x2 $171, %xmm26, %zmm18, %zmm28 {%k7} {z} +// CHECK: encoding: [0x62,0x03,0xed,0xc7,0x38,0xe2,0xab] + vinserti64x2 $0xab, %xmm26, %zmm18, %zmm28 {%k7} {z} + +// CHECK: vinserti64x2 $123, %xmm26, %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x03,0xed,0x40,0x38,0xe2,0x7b] + vinserti64x2 $0x7b, %xmm26, %zmm18, %zmm28 + +// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x21,0x7b] + vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm28 + +// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xa4,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinserti64x2 $0x7b,291(%rax,%r14,8), %zmm18, %zmm28 + +// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x7f,0x7b] + vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm28 + +// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0x00,0x08,0x00,0x00,0x7b] + vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm28 + +// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x62,0x80,0x7b] + vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm28 + +// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm28 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0xa2,0xf0,0xf7,0xff,0xff,0x7b] + vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm28 + +// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0xab] + vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 + +// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2} +// CHECK: encoding: [0x62,0x23,0xed,0x42,0x38,0xdd,0xab] + vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2} + +// CHECK: vinserti64x2 $171, %xmm21, %zmm18, %zmm27 {%k2} {z} +// CHECK: encoding: [0x62,0x23,0xed,0xc2,0x38,0xdd,0xab] + vinserti64x2 $0xab, %xmm21, %zmm18, %zmm27 {%k2} {z} + +// CHECK: vinserti64x2 $123, %xmm21, %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0xdd,0x7b] + vinserti64x2 $0x7b, %xmm21, %zmm18, %zmm27 + +// CHECK: vinserti64x2 $123, (%rcx), %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x19,0x7b] + vinserti64x2 $0x7b,(%rcx), %zmm18, %zmm27 + +// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x23,0xed,0x40,0x38,0x9c,0xf0,0x34,0x12,0x00,0x00,0x7b] + vinserti64x2 $0x7b,4660(%rax,%r14,8), %zmm18, %zmm27 + +// CHECK: vinserti64x2 $123, 2032(%rdx), %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x7f,0x7b] + vinserti64x2 $0x7b,2032(%rdx), %zmm18, %zmm27 + +// CHECK: vinserti64x2 $123, 2048(%rdx), %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b] + vinserti64x2 $0x7b,2048(%rdx), %zmm18, %zmm27 + +// CHECK: vinserti64x2 $123, -2048(%rdx), %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x5a,0x80,0x7b] + vinserti64x2 $0x7b,-2048(%rdx), %zmm18, %zmm27 + +// CHECK: vinserti64x2 $123, -2064(%rdx), %zmm18, %zmm27 +// CHECK: encoding: [0x62,0x63,0xed,0x40,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b] + vinserti64x2 $0x7b,-2064(%rdx), %zmm18, %zmm27 + // CHECK: vextractf32x8 $171, %zmm18, %ymm21 // CHECK: encoding: [0x62,0xa3,0x7d,0x48,0x1b,0xd5,0xab] vextractf32x8 $0xab, %zmm18, %ymm21 Index: test/MC/X86/x86-64-avx512dq_vl.s =================================================================== --- test/MC/X86/x86-64-avx512dq_vl.s +++ test/MC/X86/x86-64-avx512dq_vl.s @@ -3584,6 +3584,166 @@ // CHECK: encoding: [0x62,0x61,0xff,0x38,0x7a,0xa2,0xf8,0xfb,0xff,0xff] vcvtuqq2ps -1032(%rdx){1to4}, %xmm28 +// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0xab] + vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7} +// CHECK: encoding: [0x62,0xa3,0xa5,0x27,0x18,0xef,0xab] + vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7} + +// CHECK: vinsertf64x2 $171, %xmm23, %ymm27, %ymm21 {%k7} {z} +// CHECK: encoding: [0x62,0xa3,0xa5,0xa7,0x18,0xef,0xab] + vinsertf64x2 $0xab, %xmm23, %ymm27, %ymm21 {%k7} {z} + +// CHECK: vinsertf64x2 $123, %xmm23, %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xef,0x7b] + vinsertf64x2 $0x7b, %xmm23, %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $123, (%rcx), %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x29,0x7b] + vinsertf64x2 $0x7b,(%rcx), %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $123, 291(%rax,%r14,8), %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xa3,0xa5,0x20,0x18,0xac,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,291(%rax,%r14,8), %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x7f,0x7b] + vinsertf64x2 $0x7b,2032(%rdx), %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0x00,0x08,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,2048(%rdx), %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0x6a,0x80,0x7b] + vinsertf64x2 $0x7b,-2048(%rdx), %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm27, %ymm21 +// CHECK: encoding: [0x62,0xe3,0xa5,0x20,0x18,0xaa,0xf0,0xf7,0xff,0xff,0x7b] + vinsertf64x2 $0x7b,-2064(%rdx), %ymm27, %ymm21 + +// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0xab] + vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5} +// CHECK: encoding: [0x62,0x03,0xc5,0x25,0x18,0xc3,0xab] + vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5} + +// CHECK: vinsertf64x2 $171, %xmm27, %ymm23, %ymm24 {%k5} {z} +// CHECK: encoding: [0x62,0x03,0xc5,0xa5,0x18,0xc3,0xab] + vinsertf64x2 $0xab, %xmm27, %ymm23, %ymm24 {%k5} {z} + +// CHECK: vinsertf64x2 $123, %xmm27, %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x03,0xc5,0x20,0x18,0xc3,0x7b] + vinsertf64x2 $0x7b, %xmm27, %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $123, (%rcx), %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x01,0x7b] + vinsertf64x2 $0x7b,(%rcx), %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $123, 4660(%rax,%r14,8), %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x23,0xc5,0x20,0x18,0x84,0xf0,0x34,0x12,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,4660(%rax,%r14,8), %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $123, 2032(%rdx), %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x7f,0x7b] + vinsertf64x2 $0x7b,2032(%rdx), %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $123, 2048(%rdx), %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0x00,0x08,0x00,0x00,0x7b] + vinsertf64x2 $0x7b,2048(%rdx), %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $123, -2048(%rdx), %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x42,0x80,0x7b] + vinsertf64x2 $0x7b,-2048(%rdx), %ymm23, %ymm24 + +// CHECK: vinsertf64x2 $123, -2064(%rdx), %ymm23, %ymm24 +// CHECK: encoding: [0x62,0x63,0xc5,0x20,0x18,0x82,0xf0,0xf7,0xff,0xff,0x7b] + vinsertf64x2 $0x7b,-2064(%rdx), %ymm23, %ymm24 + +// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0xab] + vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 + +// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6} +// CHECK: encoding: [0x62,0xa3,0xb5,0x26,0x38,0xdd,0xab] + vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6} + +// CHECK: vinserti64x2 $171, %xmm21, %ymm25, %ymm19 {%k6} {z} +// CHECK: encoding: [0x62,0xa3,0xb5,0xa6,0x38,0xdd,0xab] + vinserti64x2 $0xab, %xmm21, %ymm25, %ymm19 {%k6} {z} + +// CHECK: vinserti64x2 $123, %xmm21, %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0xdd,0x7b] + vinserti64x2 $0x7b, %xmm21, %ymm25, %ymm19 + +// CHECK: vinserti64x2 $123, (%rcx), %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x19,0x7b] + vinserti64x2 $0x7b,(%rcx), %ymm25, %ymm19 + +// CHECK: vinserti64x2 $123, 291(%rax,%r14,8), %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xa3,0xb5,0x20,0x38,0x9c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinserti64x2 $0x7b,291(%rax,%r14,8), %ymm25, %ymm19 + +// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x7f,0x7b] + vinserti64x2 $0x7b,2032(%rdx), %ymm25, %ymm19 + +// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0x00,0x08,0x00,0x00,0x7b] + vinserti64x2 $0x7b,2048(%rdx), %ymm25, %ymm19 + +// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x5a,0x80,0x7b] + vinserti64x2 $0x7b,-2048(%rdx), %ymm25, %ymm19 + +// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm25, %ymm19 +// CHECK: encoding: [0x62,0xe3,0xb5,0x20,0x38,0x9a,0xf0,0xf7,0xff,0xff,0x7b] + vinserti64x2 $0x7b,-2064(%rdx), %ymm25, %ymm19 + +// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0xab] + vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 + +// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2} +// CHECK: encoding: [0x62,0x03,0xbd,0x22,0x38,0xe9,0xab] + vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2} + +// CHECK: vinserti64x2 $171, %xmm25, %ymm24, %ymm29 {%k2} {z} +// CHECK: encoding: [0x62,0x03,0xbd,0xa2,0x38,0xe9,0xab] + vinserti64x2 $0xab, %xmm25, %ymm24, %ymm29 {%k2} {z} + +// CHECK: vinserti64x2 $123, %xmm25, %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x03,0xbd,0x20,0x38,0xe9,0x7b] + vinserti64x2 $0x7b, %xmm25, %ymm24, %ymm29 + +// CHECK: vinserti64x2 $123, (%rcx), %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x29,0x7b] + vinserti64x2 $0x7b,(%rcx), %ymm24, %ymm29 + +// CHECK: vinserti64x2 $123, 4660(%rax,%r14,8), %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x23,0xbd,0x20,0x38,0xac,0xf0,0x34,0x12,0x00,0x00,0x7b] + vinserti64x2 $0x7b,4660(%rax,%r14,8), %ymm24, %ymm29 + +// CHECK: vinserti64x2 $123, 2032(%rdx), %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x7f,0x7b] + vinserti64x2 $0x7b,2032(%rdx), %ymm24, %ymm29 + +// CHECK: vinserti64x2 $123, 2048(%rdx), %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0x00,0x08,0x00,0x00,0x7b] + vinserti64x2 $0x7b,2048(%rdx), %ymm24, %ymm29 + +// CHECK: vinserti64x2 $123, -2048(%rdx), %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0x6a,0x80,0x7b] + vinserti64x2 $0x7b,-2048(%rdx), %ymm24, %ymm29 + +// CHECK: vinserti64x2 $123, -2064(%rdx), %ymm24, %ymm29 +// CHECK: encoding: [0x62,0x63,0xbd,0x20,0x38,0xaa,0xf0,0xf7,0xff,0xff,0x7b] + vinserti64x2 $0x7b,-2064(%rdx), %ymm24, %ymm29 + // CHECK: vextractf64x2 $171, %ymm21, %xmm27 // CHECK: encoding: [0x62,0x83,0xfd,0x28,0x19,0xeb,0xab] vextractf64x2 $0xab, %ymm21, %xmm27 Index: test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- test/MC/X86/x86-64-avx512f_vl.s +++ test/MC/X86/x86-64-avx512f_vl.s @@ -19739,6 +19739,86 @@ // CHECK: encoding: [0x62,0xe1,0xe5,0x30,0x6d,0xa2,0xf8,0xfb,0xff,0xff] vpunpckhqdq -1032(%rdx){1to4}, %ymm19, %ymm20 +// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0xab] + vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7} +// CHECK: encoding: [0x62,0x83,0x6d,0x27,0x18,0xd3,0xab] + vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7} + +// CHECK: vinsertf32x4 $171, %xmm27, %ymm18, %ymm18 {%k7} {z} +// CHECK: encoding: [0x62,0x83,0x6d,0xa7,0x18,0xd3,0xab] + vinsertf32x4 $0xab, %xmm27, %ymm18, %ymm18 {%k7} {z} + +// CHECK: vinsertf32x4 $123, %xmm27, %ymm18, %ymm18 +// CHECK: encoding: [0x62,0x83,0x6d,0x20,0x18,0xd3,0x7b] + vinsertf32x4 $0x7b, %xmm27, %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $123, (%rcx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x11,0x7b] + vinsertf32x4 $0x7b, (%rcx), %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $123, 291(%rax,%r14,8), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xa3,0x6d,0x20,0x18,0x94,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinsertf32x4 $0x7b, 291(%rax,%r14,8), %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $123, 2032(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x7f,0x7b] + vinsertf32x4 $0x7b, 2032(%rdx), %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $123, 2048(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0x00,0x08,0x00,0x00,0x7b] + vinsertf32x4 $0x7b, 2048(%rdx), %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $123, -2048(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x52,0x80,0x7b] + vinsertf32x4 $0x7b, -2048(%rdx), %ymm18, %ymm18 + +// CHECK: vinsertf32x4 $123, -2064(%rdx), %ymm18, %ymm18 +// CHECK: encoding: [0x62,0xe3,0x6d,0x20,0x18,0x92,0xf0,0xf7,0xff,0xff,0x7b] + vinsertf32x4 $0x7b, -2064(%rdx), %ymm18, %ymm18 + +// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0xab] + vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 + +// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3} +// CHECK: encoding: [0x62,0x83,0x1d,0x23,0x38,0xc8,0xab] + vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3} + +// CHECK: vinserti32x4 $171, %xmm24, %ymm28, %ymm17 {%k3} {z} +// CHECK: encoding: [0x62,0x83,0x1d,0xa3,0x38,0xc8,0xab] + vinserti32x4 $0xab, %xmm24, %ymm28, %ymm17 {%k3} {z} + +// CHECK: vinserti32x4 $123, %xmm24, %ymm28, %ymm17 +// CHECK: encoding: [0x62,0x83,0x1d,0x20,0x38,0xc8,0x7b] + vinserti32x4 $0x7b, %xmm24, %ymm28, %ymm17 + +// CHECK: vinserti32x4 $123, (%rcx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x09,0x7b] + vinserti32x4 $0x7b, (%rcx), %ymm28, %ymm17 + +// CHECK: vinserti32x4 $123, 291(%rax,%r14,8), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xa3,0x1d,0x20,0x38,0x8c,0xf0,0x23,0x01,0x00,0x00,0x7b] + vinserti32x4 $0x7b, 291(%rax,%r14,8), %ymm28, %ymm17 + +// CHECK: vinserti32x4 $123, 2032(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x7f,0x7b] + vinserti32x4 $0x7b, 2032(%rdx), %ymm28, %ymm17 + +// CHECK: vinserti32x4 $123, 2048(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0x00,0x08,0x00,0x00,0x7b] + vinserti32x4 $0x7b, 2048(%rdx), %ymm28, %ymm17 + +// CHECK: vinserti32x4 $123, -2048(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x4a,0x80,0x7b] + vinserti32x4 $0x7b, -2048(%rdx), %ymm28, %ymm17 + +// CHECK: vinserti32x4 $123, -2064(%rdx), %ymm28, %ymm17 +// CHECK: encoding: [0x62,0xe3,0x1d,0x20,0x38,0x8a,0xf0,0xf7,0xff,0xff,0x7b] + vinserti32x4 $0x7b, -2064(%rdx), %ymm28, %ymm17 + // CHECK: vextractf32x4 $171, %ymm17, %xmm28 // CHECK: encoding: [0x62,0x83,0x7d,0x28,0x19,0xcc,0xab] vextractf32x4 $0xab, %ymm17, %xmm28