Index: llvm/trunk/include/llvm/IR/IntrinsicsX86.td =================================================================== --- llvm/trunk/include/llvm/IR/IntrinsicsX86.td +++ llvm/trunk/include/llvm/IR/IntrinsicsX86.td @@ -4825,15 +4825,58 @@ GCCBuiltin<"__builtin_ia32_vbroadcastsd_pd512">, Intrinsic<[llvm_v8f64_ty], [llvm_v2f64_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastb_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastb128_mask">, + Intrinsic<[llvm_v16i8_ty], + [llvm_v16i8_ty, llvm_v16i8_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastb_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastb256_mask">, + Intrinsic<[llvm_v32i8_ty], + [llvm_v16i8_ty, llvm_v32i8_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastb_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastb512_mask">, + Intrinsic<[llvm_v64i8_ty], + [llvm_v16i8_ty, llvm_v64i8_ty, llvm_i64_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastw_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastw128_mask">, + Intrinsic<[llvm_v8i16_ty], + [llvm_v8i16_ty, llvm_v8i16_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastw_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastw256_mask">, + Intrinsic<[llvm_v16i16_ty], + [llvm_v8i16_ty, llvm_v16i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastw_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastw512_mask">, + Intrinsic<[llvm_v32i16_ty], + [llvm_v8i16_ty, llvm_v32i16_ty, llvm_i32_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastd128_mask">, + Intrinsic<[llvm_v4i32_ty], + [llvm_v4i32_ty, llvm_v4i32_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastd256_mask">, + Intrinsic<[llvm_v8i32_ty], + [llvm_v4i32_ty, llvm_v8i32_ty, llvm_i8_ty], [IntrNoMem]>; def int_x86_avx512_pbroadcastd_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastd512">, - Intrinsic<[llvm_v16i32_ty], [llvm_v4i32_ty], [IntrNoMem]>; + GCCBuiltin<"__builtin_ia32_pbroadcastd512">, + Intrinsic<[llvm_v16i32_ty], + [llvm_v4i32_ty, llvm_v16i32_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_128 : + GCCBuiltin<"__builtin_ia32_pbroadcastq128_mask">, + Intrinsic<[llvm_v2i64_ty], + [llvm_v2i64_ty, llvm_v2i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_256 : + GCCBuiltin<"__builtin_ia32_pbroadcastq256_mask">, + Intrinsic<[llvm_v4i64_ty], + [llvm_v2i64_ty, llvm_v4i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastq_512 : + GCCBuiltin<"__builtin_ia32_pbroadcastq512">, + Intrinsic<[llvm_v8i64_ty], + [llvm_v2i64_ty, llvm_v8i64_ty, llvm_i8_ty], [IntrNoMem]>; + def int_x86_avx512_pbroadcastd_i32_512 : Intrinsic<[llvm_v16i32_ty], [llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_pbroadcastq_512 : - GCCBuiltin<"__builtin_ia32_pbroadcastq512">, - Intrinsic<[llvm_v8i64_ty], [llvm_v2i64_ty], [IntrNoMem]>; def int_x86_avx512_pbroadcastq_i64_512 : Intrinsic<[llvm_v8i64_ty], [llvm_i64_ty], [IntrNoMem]>; } Index: llvm/trunk/lib/Target/X86/X86InstrAVX512.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrAVX512.td +++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td @@ -807,46 +807,45 @@ //===---------------------------------------------------------------------===// // AVX-512 BROADCAST //--- -multiclass avx512_fp_broadcast opc, SDNode OpNode, RegisterClass SrcRC, - ValueType svt, X86VectorVTInfo _> { - defm r : AVX512_maskable, - T8PD, EVEX; - let mayLoad = 1 in { - defm m : AVX512_maskable, - T8PD, EVEX; - } +multiclass avx512_broadcast_rm opc, string OpcodeStr, + X86VectorVTInfo DestInfo, X86VectorVTInfo SrcInfo> { + + defm r : AVX512_maskable, + T8PD, EVEX; + let mayLoad = 1 in + defm m : AVX512_maskable, + T8PD, EVEX, EVEX_CD8; } -multiclass avx512_fp_broadcast_vl opc, SDNode OpNode, - AVX512VLVectorVTInfo _> { - defm Z : avx512_fp_broadcast, +multiclass avx512_fp_broadcast_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _> { + defm Z : avx512_broadcast_rm, EVEX_V512; let Predicates = [HasVLX] in { - defm Z256 : avx512_fp_broadcast, - EVEX_V256; + defm Z256 : avx512_broadcast_rm, + EVEX_V256; } } let ExeDomain = SSEPackedSingle in { - defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, X86VBroadcast, - avx512vl_f32_info>, EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSS : avx512_fp_broadcast_vl<0x18, "vbroadcastss", + avx512vl_f32_info>; let Predicates = [HasVLX] in { - defm VBROADCASTSSZ128 : avx512_fp_broadcast<0x18, X86VBroadcast, VR128X, - v4f32, v4f32x_info>, EVEX_V128, - EVEX_CD8<32, CD8VT1>; + defm VBROADCASTSSZ128 : avx512_broadcast_rm<0x18, "vbroadcastss", + v4f32x_info, v4f32x_info>, EVEX_V128; } } let ExeDomain = SSEPackedDouble in { - defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, X86VBroadcast, - avx512vl_f64_info>, VEX_W, EVEX_CD8<64, CD8VT1>; + defm VBROADCASTSD : avx512_fp_broadcast_vl<0x19, "vbroadcastsd", + avx512vl_f64_info>, VEX_W; } // avx512_broadcast_pat introduces patterns for broadcast with a scalar argument. @@ -947,50 +946,41 @@ (bc_v8i64 (v16i32 immAllZerosV)), (i8 GR8:$mask))), (VPBROADCASTQrZrkz (COPY_TO_REGCLASS GR8:$mask, VK8WM), GR64:$src)>; -multiclass avx512_int_broadcast_rm opc, string OpcodeStr, - X86MemOperand x86memop, PatFrag ld_frag, - RegisterClass DstRC, ValueType OpVT, ValueType SrcVT, - RegisterClass KRC> { - def rr : AVX5128I, EVEX; - def rrk : AVX5128I, EVEX, EVEX_K; - def rrkz : AVX5128I, EVEX, EVEX_KZ; - let mayLoad = 1 in { - def rm : AVX5128I, EVEX; - def rmk : AVX5128I, EVEX, EVEX_K; - def rmkz : AVX5128I, EVEX, EVEX_KZ; +// Provide aliases for broadcast from the same register class that +// automatically does the extract. +multiclass avx512_int_broadcast_rm_lowering { + def : Pat<(DestInfo.VT (X86VBroadcast (SrcInfo.VT SrcInfo.RC:$src))), + (!cast(NAME#DestInfo.ZSuffix#"r") + (EXTRACT_SUBREG (SrcInfo.VT SrcInfo.RC:$src), sub_xmm))>; +} + +multiclass avx512_int_broadcast_rm_vl opc, string OpcodeStr, + AVX512VLVectorVTInfo _, Predicate prd> { + let Predicates = [prd] in { + defm Z : avx512_broadcast_rm, + avx512_int_broadcast_rm_lowering<_.info512, _.info256>, + EVEX_V512; + // Defined separately to avoid redefinition. + defm Z_Alt : avx512_int_broadcast_rm_lowering<_.info512, _.info512>; + } + let Predicates = [prd, HasVLX] in { + defm Z256 : avx512_broadcast_rm, + avx512_int_broadcast_rm_lowering<_.info256, _.info256>, + EVEX_V256; + defm Z128 : avx512_broadcast_rm, + EVEX_V128; } } -defm VPBROADCASTDZ : avx512_int_broadcast_rm<0x58, "vpbroadcastd", i32mem, - loadi32, VR512, v16i32, v4i32, VK16WM>, - EVEX_V512, EVEX_CD8<32, CD8VT1>; -defm VPBROADCASTQZ : avx512_int_broadcast_rm<0x59, "vpbroadcastq", i64mem, - loadi64, VR512, v8i64, v2i64, VK8WM>, EVEX_V512, VEX_W, - EVEX_CD8<64, CD8VT1>; +defm VPBROADCASTB : avx512_int_broadcast_rm_vl<0x78, "vpbroadcastb", + avx512vl_i8_info, HasBWI>; +defm VPBROADCASTW : avx512_int_broadcast_rm_vl<0x79, "vpbroadcastw", + avx512vl_i16_info, HasBWI>; +defm VPBROADCASTD : avx512_int_broadcast_rm_vl<0x58, "vpbroadcastd", + avx512vl_i32_info, HasAVX512>; +defm VPBROADCASTQ : avx512_int_broadcast_rm_vl<0x59, "vpbroadcastq", + avx512vl_i64_info, HasAVX512>, VEX_W; multiclass avx512_subvec_broadcast_rm opc, string OpcodeStr, X86VectorVTInfo _Dst, X86VectorVTInfo _Src> { @@ -1057,11 +1047,6 @@ EVEX_V512, EVEX_CD8<32, CD8VT8>; } -def : Pat<(v16i32 (int_x86_avx512_pbroadcastd_512 (v4i32 VR128X:$src))), - (VPBROADCASTDZrr VR128X:$src)>; -def : Pat<(v8i64 (int_x86_avx512_pbroadcastq_512 (v2i64 VR128X:$src))), - (VPBROADCASTQZrr VR128X:$src)>; - def : Pat<(v16f32 (X86VBroadcast (v16f32 VR512:$src))), (VBROADCASTSSZr (EXTRACT_SUBREG (v16f32 VR512:$src), sub_xmm))>; def : Pat<(v16f32 (X86VBroadcast (v8f32 VR256X:$src))), @@ -1072,16 +1057,6 @@ def : Pat<(v8f64 (X86VBroadcast (v4f64 VR256X:$src))), (VBROADCASTSDZr (EXTRACT_SUBREG (v4f64 VR256X:$src), sub_xmm))>; -def : Pat<(v16i32 (X86VBroadcast (v16i32 VR512:$src))), - (VPBROADCASTDZrr (EXTRACT_SUBREG (v16i32 VR512:$src), sub_xmm))>; -def : Pat<(v16i32 (X86VBroadcast (v8i32 VR256X:$src))), - (VPBROADCASTDZrr (EXTRACT_SUBREG (v8i32 VR256X:$src), sub_xmm))>; - -def : Pat<(v8i64 (X86VBroadcast (v8i64 VR512:$src))), - (VPBROADCASTQZrr (EXTRACT_SUBREG (v8i64 VR512:$src), sub_xmm))>; -def : Pat<(v8i64 (X86VBroadcast (v4i64 VR256X:$src))), - (VPBROADCASTQZrr (EXTRACT_SUBREG (v4i64 VR256X:$src), sub_xmm))>; - def : Pat<(v16f32 (int_x86_avx512_vbroadcast_ss_ps_512 (v4f32 VR128X:$src))), (VBROADCASTSSZr VR128X:$src)>; def : Pat<(v8f64 (int_x86_avx512_vbroadcast_sd_pd_512 (v2f64 VR128X:$src))), Index: llvm/trunk/lib/Target/X86/X86InstrSSE.td =================================================================== --- llvm/trunk/lib/Target/X86/X86InstrSSE.td +++ llvm/trunk/lib/Target/X86/X86InstrSSE.td @@ -8318,29 +8318,45 @@ // multiclass avx2_broadcast opc, string OpcodeStr, X86MemOperand x86memop, PatFrag ld_frag, - ValueType OpVT128, ValueType OpVT256> { - def rr : AVX28I { + let Predicates = [HasAVX2, prd] in { + def rr : AVX28I, + [(set VR128:$dst, + (OpVT128 (X86VBroadcast (OpVT128 VR128:$src))))]>, Sched<[WriteShuffle]>, VEX; - def rm : AVX28I, + [(set VR128:$dst, + (OpVT128 (X86VBroadcast (ld_frag addr:$src))))]>, Sched<[WriteLoad]>, VEX; - def Yrr : AVX28I, + [(set VR256:$dst, + (OpVT256 (X86VBroadcast (OpVT128 VR128:$src))))]>, Sched<[WriteShuffle256]>, VEX, VEX_L; - def Yrm : AVX28I, + [(set VR256:$dst, + (OpVT256 (X86VBroadcast (ld_frag addr:$src))))]>, Sched<[WriteLoad]>, VEX, VEX_L; + + // Provide aliases for broadcast from the same register class that + // automatically does the extract. + def : Pat<(OpVT256 (X86VBroadcast (OpVT256 VR256:$src))), + (!cast(NAME#"Yrr") + (OpVT128 (EXTRACT_SUBREG (OpVT256 VR256:$src),sub_xmm)))>; + } } -defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, v16i8, v32i8>; -defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, v8i16, v16i16>; -defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, v4i32, v8i32>; -defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, v2i64, v4i64>; +defm VPBROADCASTB : avx2_broadcast<0x78, "vpbroadcastb", i8mem, loadi8, + v16i8, v32i8, NoVLX_Or_NoBWI>; +defm VPBROADCASTW : avx2_broadcast<0x79, "vpbroadcastw", i16mem, loadi16, + v8i16, v16i16, NoVLX_Or_NoBWI>; +defm VPBROADCASTD : avx2_broadcast<0x58, "vpbroadcastd", i32mem, loadi32, + v4i32, v8i32, NoVLX>; +defm VPBROADCASTQ : avx2_broadcast<0x59, "vpbroadcastq", i64mem, loadi64, + v2i64, v4i64, NoVLX>; let Predicates = [HasAVX2] in { // loadi16 is tricky to fold, because !isTypeDesirableForOp, justifiably. @@ -8352,18 +8368,6 @@ // Provide aliases for broadcast from the same register class that // automatically does the extract. - def : Pat<(v32i8 (X86VBroadcast (v32i8 VR256:$src))), - (VPBROADCASTBYrr (v16i8 (EXTRACT_SUBREG (v32i8 VR256:$src), - sub_xmm)))>; - def : Pat<(v16i16 (X86VBroadcast (v16i16 VR256:$src))), - (VPBROADCASTWYrr (v8i16 (EXTRACT_SUBREG (v16i16 VR256:$src), - sub_xmm)))>; - def : Pat<(v8i32 (X86VBroadcast (v8i32 VR256:$src))), - (VPBROADCASTDYrr (v4i32 (EXTRACT_SUBREG (v8i32 VR256:$src), - sub_xmm)))>; - def : Pat<(v4i64 (X86VBroadcast (v4i64 VR256:$src))), - (VPBROADCASTQYrr (v2i64 (EXTRACT_SUBREG (v4i64 VR256:$src), - sub_xmm)))>; def : Pat<(v8f32 (X86VBroadcast (v8f32 VR256:$src))), (VBROADCASTSSYrr (v4f32 (EXTRACT_SUBREG (v8f32 VR256:$src), sub_xmm)))>; Index: llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h +++ llvm/trunk/lib/Target/X86/X86IntrinsicsInfo.h @@ -1564,6 +1564,30 @@ X86ISD::VPERMV3, 0), X86_INTRINSIC_DATA(avx512_maskz_vpermt2var_q_512, VPERM_3OP_MASKZ, X86ISD::VPERMV3, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastb_128, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastb_256, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastb_512, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastd_128, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastd_256, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastd_512, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastq_128, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastq_256, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastq_512, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastw_128, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastw_256, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), + X86_INTRINSIC_DATA(avx512_pbroadcastw_512, INTR_TYPE_1OP_MASK, + X86ISD::VBROADCAST, 0), X86_INTRINSIC_DATA(avx512_psad_bw_512, INTR_TYPE_2OP, X86ISD::PSADBW, 0), X86_INTRINSIC_DATA(avx512_psll_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSHLDQ, 0), X86_INTRINSIC_DATA(avx512_psrl_dq_512, INTR_TYPE_2OP_IMM8, X86ISD::VSRLDQ, 0), Index: llvm/trunk/test/CodeGen/X86/avx-isa-check.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx-isa-check.ll +++ llvm/trunk/test/CodeGen/X86/avx-isa-check.ll @@ -323,3 +323,24 @@ %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 ret <16 x i8> %r2 } + +define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { + %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> + ret <4 x i32> %shuffle +} + +define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { + %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { + %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> + ret <16 x i16> %shuffle +} + Index: llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512-intrinsics.ll @@ -365,12 +365,24 @@ } declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.pd.512(<2 x double>) nounwind readonly -define <16 x i32> @test_x86_pbroadcastd_512(<4 x i32> %a0) { - ; CHECK: vpbroadcastd - %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %a0) ; <<16 x i32>> [#uses=1] - ret <16 x i32> %res +define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpbroadcastd %xmm0, %zmm1 {%k1} +; CHECK-NEXT: vpbroadcastd %xmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastd %xmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddd %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1) + %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) + %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask) + %res3 = add <16 x i32> %res, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 } -declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>) nounwind readonly +declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16) define <16 x i32> @test_x86_pbroadcastd_i32_512(i32 %a0) { ; CHECK: vpbroadcastd @@ -379,12 +391,25 @@ } declare <16 x i32> @llvm.x86.avx512.pbroadcastd.i32.512(i32) nounwind readonly -define <8 x i64> @test_x86_pbroadcastq_512(<2 x i64> %a0) { - ; CHECK: vpbroadcastq - %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %a0) ; <<8 x i64>> [#uses=1] - ret <8 x i64> %res +define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_512: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpbroadcastq %xmm0, %zmm1 {%k1} +; CHECK-NEXT: vpbroadcastq %xmm0, %zmm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastq %xmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 +; CHECK-NEXT: vpaddq %zmm0, %zmm2, %zmm0 +; CHECK-NEXT: retq + %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1) + %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask) + %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask) + %res3 = add <8 x i64> %res, %res1 + %res4 = add <8 x i64> %res2, %res3 + ret <8 x i64> %res4 } -declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>) nounwind readonly +declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8) define <8 x i64> @test_x86_pbroadcastq_i64_512(i64 %a0) { ; CHECK: vpbroadcastq Index: llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -4276,3 +4276,124 @@ ret <16 x i16> %res4 } +declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32) + +define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 +; CHECK-NEXT: vpbroadcastb %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpbroadcastb %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 +; CHECK-NEXT: vpaddb %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddb %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: retq + %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1) + %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) + %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask) + %res3 = add <32 x i8> %res, %res1 + %res4 = add <32 x i8> %res2, %res3 + ret <32 x i8> %res4 +} + +declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16) + +define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_128: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpbroadcastb %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastb %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 +; CHECK-NEXT: vpaddb %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddb %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq + %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1) + %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) + %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask) + %res3 = add <16 x i8> %res, %res1 + %res4 = add <16 x i8> %res2, %res3 + ret <16 x i8> %res4 +} + +declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16) + +define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_256: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovw %edi, %k1 +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddw %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: retq + %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1) + %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) + %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask) + %res3 = add <16 x i16> %res, %res1 + %res4 = add <16 x i16> %res2, %res3 + ret <16 x i16> %res4 +} + +declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8) + +define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddw %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq + %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1) + %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) + %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask) + %res3 = add <8 x i16> %res, %res1 + %res4 = add <8 x i16> %res2, %res3 + ret <8 x i16> %res4 +} + +declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64) + +define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastb_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovq %rdi, %k1 ## encoding: [0xc4,0xe1,0xfb,0x92,0xcf] +; CHECK-NEXT: vpbroadcastb %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8] +; CHECK-NEXT: vpbroadcastb %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xd0] +; CHECK-NEXT: vpbroadcastb %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x78,0xc0] +; CHECK-NEXT: vpaddb %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1] +; CHECK-NEXT: vpaddb %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1) + %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) + %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask) + %res3 = add <64 x i8> %res, %res1 + %res4 = add <64 x i8> %res2, %res3 + ret <64 x i8> %res4 +} + +declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32) + +define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastw_512: +; CHECK: ## BB#0: +; CHECK-NEXT: kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf] +; CHECK-NEXT: vpbroadcastw %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8] +; CHECK-NEXT: vpbroadcastw %xmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xd0] +; CHECK-NEXT: vpbroadcastw %xmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x79,0xc0] +; CHECK-NEXT: vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1] +; CHECK-NEXT: vpaddw %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0] +; CHECK-NEXT: retq ## encoding: [0xc3] + %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1) + %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) + %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask) + %res3 = add <32 x i16> %res, %res1 + %res4 = add <32 x i16> %res2, %res3 + ret <32 x i16> %res4 +} + Index: llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll +++ llvm/trunk/test/CodeGen/X86/avx512vl-intrinsics.ll @@ -5184,3 +5184,88 @@ %res2 = add <4 x i64> %res, %res1 ret <4 x i64> %res2 } + +declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8) + +define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpbroadcastd %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpbroadcastd %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastd %xmm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddd %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: retq + %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 -1) + %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask) + %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask) + %res3 = add <8 x i32> %res, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} + +declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8) + +define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpbroadcastd %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastd %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastd %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddd %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq + %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1) + %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) + %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask) + %res3 = add <4 x i32> %res, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} + +declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8) + +define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpbroadcastq %xmm0, %ymm1 {%k1} +; CHECK-NEXT: vpbroadcastq %xmm0, %ymm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastq %xmm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm1, %ymm0, %ymm0 +; CHECK-NEXT: vpaddq %ymm0, %ymm2, %ymm0 +; CHECK-NEXT: retq + %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1) + %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask) + %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask) + %res3 = add <4 x i64> %res, %res1 + %res4 = add <4 x i64> %res2, %res3 + ret <4 x i64> %res4 +} + +declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8) + +define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) { +; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128: +; CHECK: ## BB#0: +; CHECK-NEXT: movzbl %dil, %eax +; CHECK-NEXT: kmovw %eax, %k1 +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm1 {%k1} +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm2 {%k1} {z} +; CHECK-NEXT: vpbroadcastq %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm1, %xmm0, %xmm0 +; CHECK-NEXT: vpaddq %xmm0, %xmm2, %xmm0 +; CHECK-NEXT: retq + %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1) + %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask) + %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask) + %res3 = add <2 x i64> %res, %res1 + %res4 = add <2 x i64> %res2, %res3 + ret <2 x i64> %res4 +} + Index: llvm/trunk/test/MC/X86/avx512-encodings.s =================================================================== --- llvm/trunk/test/MC/X86/avx512-encodings.s +++ llvm/trunk/test/MC/X86/avx512-encodings.s @@ -17733,3 +17733,127 @@ // CHECK: encoding: [0x62,0x73,0xed,0x58,0x25,0xba,0xf8,0xfb,0xff,0xff,0x7b] vpternlogq $0x7b, -1032(%rdx){1to8}, %zmm2, %zmm15 +// CHECK: vpbroadcastd (%rcx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x11] + vpbroadcastd (%rcx), %zmm26 + +// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2} +// CHECK: encoding: [0x62,0x62,0x7d,0x4a,0x58,0x11] + vpbroadcastd (%rcx), %zmm26 {%k2} + +// CHECK: vpbroadcastd (%rcx), %zmm26 {%k2} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xca,0x58,0x11] + vpbroadcastd (%rcx), %zmm26 {%k2} {z} + +// CHECK: vpbroadcastd 291(%rax,%r14,8), %zmm26 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x58,0x94,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastd 291(%rax,%r14,8), %zmm26 + +// CHECK: vpbroadcastd 508(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x7f] + vpbroadcastd 508(%rdx), %zmm26 + +// CHECK: vpbroadcastd 512(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0x00,0x02,0x00,0x00] + vpbroadcastd 512(%rdx), %zmm26 + +// CHECK: vpbroadcastd -512(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x52,0x80] + vpbroadcastd -512(%rdx), %zmm26 + +// CHECK: vpbroadcastd -516(%rdx), %zmm26 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x58,0x92,0xfc,0xfd,0xff,0xff] + vpbroadcastd -516(%rdx), %zmm26 + +// CHECK: vpbroadcastd %xmm22, %zmm10 +// CHECK: encoding: [0x62,0x32,0x7d,0x48,0x58,0xd6] + vpbroadcastd %xmm22, %zmm10 + +// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7} +// CHECK: encoding: [0x62,0x32,0x7d,0x4f,0x58,0xd6] + vpbroadcastd %xmm22, %zmm10 {%k7} + +// CHECK: vpbroadcastd %xmm22, %zmm10 {%k7} {z} +// CHECK: encoding: [0x62,0x32,0x7d,0xcf,0x58,0xd6] + vpbroadcastd %xmm22, %zmm10 {%k7} {z} + +// CHECK: vpbroadcastd %eax, %zmm11 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x7c,0xd8] + vpbroadcastd %eax, %zmm11 + +// CHECK: vpbroadcastd %eax, %zmm11 {%k6} +// CHECK: encoding: [0x62,0x72,0x7d,0x4e,0x7c,0xd8] + vpbroadcastd %eax, %zmm11 {%k6} + +// CHECK: vpbroadcastd %eax, %zmm11 {%k6} {z} +// CHECK: encoding: [0x62,0x72,0x7d,0xce,0x7c,0xd8] + vpbroadcastd %eax, %zmm11 {%k6} {z} + +// CHECK: vpbroadcastd %ebp, %zmm11 +// CHECK: encoding: [0x62,0x72,0x7d,0x48,0x7c,0xdd] + vpbroadcastd %ebp, %zmm11 + +// CHECK: vpbroadcastd %r13d, %zmm11 +// CHECK: encoding: [0x62,0x52,0x7d,0x48,0x7c,0xdd] + vpbroadcastd %r13d, %zmm11 + +// CHECK: vpbroadcastq (%rcx), %zmm25 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x09] + vpbroadcastq (%rcx), %zmm25 + +// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2} +// CHECK: encoding: [0x62,0x62,0xfd,0x4a,0x59,0x09] + vpbroadcastq (%rcx), %zmm25 {%k2} + +// CHECK: vpbroadcastq (%rcx), %zmm25 {%k2} {z} +// CHECK: encoding: [0x62,0x62,0xfd,0xca,0x59,0x09] + vpbroadcastq (%rcx), %zmm25 {%k2} {z} + +// CHECK: vpbroadcastq 291(%rax,%r14,8), %zmm25 +// CHECK: encoding: [0x62,0x22,0xfd,0x48,0x59,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastq 291(%rax,%r14,8), %zmm25 + +// CHECK: vpbroadcastq 1016(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x7f] + vpbroadcastq 1016(%rdx), %zmm25 + +// CHECK: vpbroadcastq 1024(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0x00,0x04,0x00,0x00] + vpbroadcastq 1024(%rdx), %zmm25 + +// CHECK: vpbroadcastq -1024(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x4a,0x80] + vpbroadcastq -1024(%rdx), %zmm25 + +// CHECK: vpbroadcastq -1032(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0xfd,0x48,0x59,0x8a,0xf8,0xfb,0xff,0xff] + vpbroadcastq -1032(%rdx), %zmm25 + +// CHECK: vpbroadcastq %xmm5, %zmm3 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x59,0xdd] + vpbroadcastq %xmm5, %zmm3 + +// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5} +// CHECK: encoding: [0x62,0xf2,0xfd,0x4d,0x59,0xdd] + vpbroadcastq %xmm5, %zmm3 {%k5} + +// CHECK: vpbroadcastq %xmm5, %zmm3 {%k5} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0xcd,0x59,0xdd] + vpbroadcastq %xmm5, %zmm3 {%k5} {z} + +// CHECK: vpbroadcastq %rax, %zmm1 +// CHECK: encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xc8] + vpbroadcastq %rax, %zmm1 + +// CHECK: vpbroadcastq %rax, %zmm1 {%k6} +// CHECK: encoding: [0x62,0xf2,0xfd,0x4e,0x7c,0xc8] + vpbroadcastq %rax, %zmm1 {%k6} + +// CHECK: vpbroadcastq %rax, %zmm1 {%k6} {z} +// CHECK: encoding: [0x62,0xf2,0xfd,0xce,0x7c,0xc8] + vpbroadcastq %rax, %zmm1 {%k6} {z} + +// CHECK: vpbroadcastq %r8, %zmm1 +// CHECK: encoding: [0x62,0xd2,0xfd,0x48,0x7c,0xc8] + vpbroadcastq %r8, %zmm1 + Index: llvm/trunk/test/MC/X86/x86-64-avx512bw.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64-avx512bw.s +++ llvm/trunk/test/MC/X86/x86-64-avx512bw.s @@ -4595,3 +4595,99 @@ // CHECK: encoding: [0x62,0xe1,0x7e,0x48,0x70,0x92,0xc0,0xdf,0xff,0xff,0x7b] vpshufhw $123, -8256(%rdx), %zmm18 +// CHECK: vpbroadcastb %xmm23, %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x78,0xcf] + vpbroadcastb %xmm23, %zmm25 + +// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x4f,0x78,0xcf] + vpbroadcastb %xmm23, %zmm25 {%k7} + +// CHECK: vpbroadcastb %xmm23, %zmm25 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcf,0x78,0xcf] + vpbroadcastb %xmm23, %zmm25 {%k7} {z} + +// CHECK: vpbroadcastb (%rcx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x09] + vpbroadcastb (%rcx), %zmm25 + +// CHECK: vpbroadcastb 291(%rax,%r14,8), %zmm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x78,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastb 291(%rax,%r14,8), %zmm25 + +// CHECK: vpbroadcastb 127(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x7f] + vpbroadcastb 127(%rdx), %zmm25 + +// CHECK: vpbroadcastb 128(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x80,0x00,0x00,0x00] + vpbroadcastb 128(%rdx), %zmm25 + +// CHECK: vpbroadcastb -128(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x4a,0x80] + vpbroadcastb -128(%rdx), %zmm25 + +// CHECK: vpbroadcastb -129(%rdx), %zmm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x78,0x8a,0x7f,0xff,0xff,0xff] + vpbroadcastb -129(%rdx), %zmm25 + +// CHECK: vpbroadcastb %eax, %zmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x48,0x7a,0xd8] + vpbroadcastb %eax, %zmm19 + +// CHECK: vpbroadcastb %eax, %zmm19 {%k7} +// CHECK: encoding: [0x62,0xe2,0x7d,0x4f,0x7a,0xd8] + vpbroadcastb %eax, %zmm19 {%k7} + +// CHECK: vpbroadcastb %eax, %zmm19 {%k7} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0xcf,0x7a,0xd8] + vpbroadcastb %eax, %zmm19 {%k7} {z} + +// CHECK: vpbroadcastw %xmm19, %zmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x79,0xf3] + vpbroadcastw %xmm19, %zmm30 + +// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4} +// CHECK: encoding: [0x62,0x22,0x7d,0x4c,0x79,0xf3] + vpbroadcastw %xmm19, %zmm30 {%k4} + +// CHECK: vpbroadcastw %xmm19, %zmm30 {%k4} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xcc,0x79,0xf3] + vpbroadcastw %xmm19, %zmm30 {%k4} {z} + +// CHECK: vpbroadcastw (%rcx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x31] + vpbroadcastw (%rcx), %zmm30 + +// CHECK: vpbroadcastw 291(%rax,%r14,8), %zmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x48,0x79,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastw 291(%rax,%r14,8), %zmm30 + +// CHECK: vpbroadcastw 254(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x7f] + vpbroadcastw 254(%rdx), %zmm30 + +// CHECK: vpbroadcastw 256(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0x00,0x01,0x00,0x00] + vpbroadcastw 256(%rdx), %zmm30 + +// CHECK: vpbroadcastw -256(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0x72,0x80] + vpbroadcastw -256(%rdx), %zmm30 + +// CHECK: vpbroadcastw -258(%rdx), %zmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x79,0xb2,0xfe,0xfe,0xff,0xff] + vpbroadcastw -258(%rdx), %zmm30 + +// CHECK: vpbroadcastw %eax, %zmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x48,0x7b,0xc0] + vpbroadcastw %eax, %zmm24 + +// CHECK: vpbroadcastw %eax, %zmm24 {%k1} +// CHECK: encoding: [0x62,0x62,0x7d,0x49,0x7b,0xc0] + vpbroadcastw %eax, %zmm24 {%k1} + +// CHECK: vpbroadcastw %eax, %zmm24 {%k1} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xc9,0x7b,0xc0] + vpbroadcastw %eax, %zmm24 {%k1} {z} + Index: llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s +++ llvm/trunk/test/MC/X86/x86-64-avx512bw_vl.s @@ -9246,3 +9246,388 @@ // CHECK: vpsadbw -4128(%rdx), %ymm26, %ymm20 // CHECK: encoding: [0x62,0xe1,0x2d,0x20,0xf6,0xa2,0xe0,0xef,0xff,0xff] vpsadbw -4128(%rdx), %ymm26, %ymm20 + +// CHECK: vpbroadcastb %xmm28, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x78,0xf4] + vpbroadcastb %xmm28, %xmm30 + +// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4} +// CHECK: encoding: [0x62,0x02,0x7d,0x0c,0x78,0xf4] + vpbroadcastb %xmm28, %xmm30 {%k4} + +// CHECK: vpbroadcastb %xmm28, %xmm30 {%k4} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x8c,0x78,0xf4] + vpbroadcastb %xmm28, %xmm30 {%k4} {z} + +// CHECK: vpbroadcastb (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x31] + vpbroadcastb (%rcx), %xmm30 + +// CHECK: vpbroadcastb 4660(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x78,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpbroadcastb 4660(%rax,%r14,8), %xmm30 + +// CHECK: vpbroadcastb 127(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x7f] + vpbroadcastb 127(%rdx), %xmm30 + +// CHECK: vpbroadcastb 128(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x80,0x00,0x00,0x00] + vpbroadcastb 128(%rdx), %xmm30 + +// CHECK: vpbroadcastb -128(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0x72,0x80] + vpbroadcastb -128(%rdx), %xmm30 + +// CHECK: vpbroadcastb -129(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x78,0xb2,0x7f,0xff,0xff,0xff] + vpbroadcastb -129(%rdx), %xmm30 + +// CHECK: vpbroadcastb %xmm25, %ymm17 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x78,0xc9] + vpbroadcastb %xmm25, %ymm17 + +// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2} +// CHECK: encoding: [0x62,0x82,0x7d,0x2a,0x78,0xc9] + vpbroadcastb %xmm25, %ymm17 {%k2} + +// CHECK: vpbroadcastb %xmm25, %ymm17 {%k2} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xaa,0x78,0xc9] + vpbroadcastb %xmm25, %ymm17 {%k2} {z} + +// CHECK: vpbroadcastb (%rcx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x09] + vpbroadcastb (%rcx), %ymm17 + +// CHECK: vpbroadcastb 4660(%rax,%r14,8), %ymm17 +// CHECK: encoding: [0x62,0xa2,0x7d,0x28,0x78,0x8c,0xf0,0x34,0x12,0x00,0x00] + vpbroadcastb 4660(%rax,%r14,8), %ymm17 + +// CHECK: vpbroadcastb 127(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x7f] + vpbroadcastb 127(%rdx), %ymm17 + +// CHECK: vpbroadcastb 128(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x80,0x00,0x00,0x00] + vpbroadcastb 128(%rdx), %ymm17 + +// CHECK: vpbroadcastb -128(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x4a,0x80] + vpbroadcastb -128(%rdx), %ymm17 + +// CHECK: vpbroadcastb -129(%rdx), %ymm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x78,0x8a,0x7f,0xff,0xff,0xff] + vpbroadcastb -129(%rdx), %ymm17 + +// CHECK: vpbroadcastb %eax, %xmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xe0] + vpbroadcastb %eax, %xmm20 + +// CHECK: vpbroadcastb %eax, %xmm20 {%k3} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xe0] + vpbroadcastb %eax, %xmm20 {%k3} + +// CHECK: vpbroadcastb %eax, %xmm20 {%k3} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xe0] + vpbroadcastb %eax, %xmm20 {%k3} {z} + +// CHECK: vpbroadcastb %eax, %ymm27 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7a,0xd8] + vpbroadcastb %eax, %ymm27 + +// CHECK: vpbroadcastb %eax, %ymm27 {%k6} +// CHECK: encoding: [0x62,0x62,0x7d,0x2e,0x7a,0xd8] + vpbroadcastb %eax, %ymm27 {%k6} + +// CHECK: vpbroadcastb %eax, %ymm27 {%k6} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xae,0x7a,0xd8] + vpbroadcastb %eax, %ymm27 {%k6} {z} + +// CHECK: vpbroadcastw %xmm24, %xmm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x08,0x79,0xf0] + vpbroadcastw %xmm24, %xmm30 + +// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1} +// CHECK: encoding: [0x62,0x02,0x7d,0x09,0x79,0xf0] + vpbroadcastw %xmm24, %xmm30 {%k1} + +// CHECK: vpbroadcastw %xmm24, %xmm30 {%k1} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0x89,0x79,0xf0] + vpbroadcastw %xmm24, %xmm30 {%k1} {z} + +// CHECK: vpbroadcastw (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x31] + vpbroadcastw (%rcx), %xmm30 + +// CHECK: vpbroadcastw 4660(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x79,0xb4,0xf0,0x34,0x12,0x00,0x00] + vpbroadcastw 4660(%rax,%r14,8), %xmm30 + +// CHECK: vpbroadcastw 254(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x7f] + vpbroadcastw 254(%rdx), %xmm30 + +// CHECK: vpbroadcastw 256(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0x00,0x01,0x00,0x00] + vpbroadcastw 256(%rdx), %xmm30 + +// CHECK: vpbroadcastw -256(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0x72,0x80] + vpbroadcastw -256(%rdx), %xmm30 + +// CHECK: vpbroadcastw -258(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x79,0xb2,0xfe,0xfe,0xff,0xff] + vpbroadcastw -258(%rdx), %xmm30 + +// CHECK: vpbroadcastw %xmm18, %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xe2] + vpbroadcastw %xmm18, %ymm28 + +// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3} +// CHECK: encoding: [0x62,0x22,0x7d,0x2b,0x79,0xe2] + vpbroadcastw %xmm18, %ymm28 {%k3} + +// CHECK: vpbroadcastw %xmm18, %ymm28 {%k3} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xab,0x79,0xe2] + vpbroadcastw %xmm18, %ymm28 {%k3} {z} + +// CHECK: vpbroadcastw (%rcx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x21] + vpbroadcastw (%rcx), %ymm28 + +// CHECK: vpbroadcastw 4660(%rax,%r14,8), %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xa4,0xf0,0x34,0x12,0x00,0x00] + vpbroadcastw 4660(%rax,%r14,8), %ymm28 + +// CHECK: vpbroadcastw 254(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x7f] + vpbroadcastw 254(%rdx), %ymm28 + +// CHECK: vpbroadcastw 256(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0x00,0x01,0x00,0x00] + vpbroadcastw 256(%rdx), %ymm28 + +// CHECK: vpbroadcastw -256(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x62,0x80] + vpbroadcastw -256(%rdx), %ymm28 + +// CHECK: vpbroadcastw -258(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0xa2,0xfe,0xfe,0xff,0xff] + vpbroadcastw -258(%rdx), %ymm28 + +// CHECK: vpbroadcastw %eax, %xmm24 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x7b,0xc0] + vpbroadcastw %eax, %xmm24 + +// CHECK: vpbroadcastw %eax, %xmm24 {%k6} +// CHECK: encoding: [0x62,0x62,0x7d,0x0e,0x7b,0xc0] + vpbroadcastw %eax, %xmm24 {%k6} + +// CHECK: vpbroadcastw %eax, %xmm24 {%k6} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0x8e,0x7b,0xc0] + vpbroadcastw %eax, %xmm24 {%k6} {z} + +// CHECK: vpbroadcastw %eax, %ymm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x7b,0xd8] + vpbroadcastw %eax, %ymm19 + +// CHECK: vpbroadcastw %eax, %ymm19 {%k3} +// CHECK: encoding: [0x62,0xe2,0x7d,0x2b,0x7b,0xd8] + vpbroadcastw %eax, %ymm19 {%k3} + +// CHECK: vpbroadcastw %eax, %ymm19 {%k3} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0xab,0x7b,0xd8] + vpbroadcastw %eax, %ymm19 {%k3} {z} + +// CHECK: vpbroadcastb %xmm20, %xmm20 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x78,0xe4] + vpbroadcastb %xmm20, %xmm20 + +// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0f,0x78,0xe4] + vpbroadcastb %xmm20, %xmm20 {%k7} + +// CHECK: vpbroadcastb %xmm20, %xmm20 {%k7} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0x8f,0x78,0xe4] + vpbroadcastb %xmm20, %xmm20 {%k7} {z} + +// CHECK: vpbroadcastb (%rcx), %xmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x21] + vpbroadcastb (%rcx), %xmm20 + +// CHECK: vpbroadcastb 291(%rax,%r14,8), %xmm20 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x78,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastb 291(%rax,%r14,8), %xmm20 + +// CHECK: vpbroadcastb 127(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x7f] + vpbroadcastb 127(%rdx), %xmm20 + +// CHECK: vpbroadcastb 128(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x80,0x00,0x00,0x00] + vpbroadcastb 128(%rdx), %xmm20 + +// CHECK: vpbroadcastb -128(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0x62,0x80] + vpbroadcastb -128(%rdx), %xmm20 + +// CHECK: vpbroadcastb -129(%rdx), %xmm20 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x78,0xa2,0x7f,0xff,0xff,0xff] + vpbroadcastb -129(%rdx), %xmm20 + +// CHECK: vpbroadcastb %xmm27, %ymm30 +// CHECK: encoding: [0x62,0x02,0x7d,0x28,0x78,0xf3] + vpbroadcastb %xmm27, %ymm30 + +// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6} +// CHECK: encoding: [0x62,0x02,0x7d,0x2e,0x78,0xf3] + vpbroadcastb %xmm27, %ymm30 {%k6} + +// CHECK: vpbroadcastb %xmm27, %ymm30 {%k6} {z} +// CHECK: encoding: [0x62,0x02,0x7d,0xae,0x78,0xf3] + vpbroadcastb %xmm27, %ymm30 {%k6} {z} + +// CHECK: vpbroadcastb (%rcx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x31] + vpbroadcastb (%rcx), %ymm30 + +// CHECK: vpbroadcastb 291(%rax,%r14,8), %ymm30 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x78,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastb 291(%rax,%r14,8), %ymm30 + +// CHECK: vpbroadcastb 127(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x7f] + vpbroadcastb 127(%rdx), %ymm30 + +// CHECK: vpbroadcastb 128(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x80,0x00,0x00,0x00] + vpbroadcastb 128(%rdx), %ymm30 + +// CHECK: vpbroadcastb -128(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0x72,0x80] + vpbroadcastb -128(%rdx), %ymm30 + +// CHECK: vpbroadcastb -129(%rdx), %ymm30 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x78,0xb2,0x7f,0xff,0xff,0xff] + vpbroadcastb -129(%rdx), %ymm30 + +// CHECK: vpbroadcastb %eax, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7a,0xf0] + vpbroadcastb %eax, %xmm22 + +// CHECK: vpbroadcastb %eax, %xmm22 {%k3} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0b,0x7a,0xf0] + vpbroadcastb %eax, %xmm22 {%k3} + +// CHECK: vpbroadcastb %eax, %xmm22 {%k3} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0x8b,0x7a,0xf0] + vpbroadcastb %eax, %xmm22 {%k3} {z} + +// CHECK: vpbroadcastb %eax, %ymm17 +// CHECK: encoding: [0x62,0xe2,0x7d,0x28,0x7a,0xc8] + vpbroadcastb %eax, %ymm17 + +// CHECK: vpbroadcastb %eax, %ymm17 {%k1} +// CHECK: encoding: [0x62,0xe2,0x7d,0x29,0x7a,0xc8] + vpbroadcastb %eax, %ymm17 {%k1} + +// CHECK: vpbroadcastb %eax, %ymm17 {%k1} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0xa9,0x7a,0xc8] + vpbroadcastb %eax, %ymm17 {%k1} {z} + +// CHECK: vpbroadcastw %xmm20, %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x79,0xdc] + vpbroadcastw %xmm20, %xmm19 + +// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2} +// CHECK: encoding: [0x62,0xa2,0x7d,0x0a,0x79,0xdc] + vpbroadcastw %xmm20, %xmm19 {%k2} + +// CHECK: vpbroadcastw %xmm20, %xmm19 {%k2} {z} +// CHECK: encoding: [0x62,0xa2,0x7d,0x8a,0x79,0xdc] + vpbroadcastw %xmm20, %xmm19 {%k2} {z} + +// CHECK: vpbroadcastw (%rcx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x19] + vpbroadcastw (%rcx), %xmm19 + +// CHECK: vpbroadcastw 291(%rax,%r14,8), %xmm19 +// CHECK: encoding: [0x62,0xa2,0x7d,0x08,0x79,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastw 291(%rax,%r14,8), %xmm19 + +// CHECK: vpbroadcastw 254(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x7f] + vpbroadcastw 254(%rdx), %xmm19 + +// CHECK: vpbroadcastw 256(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0x00,0x01,0x00,0x00] + vpbroadcastw 256(%rdx), %xmm19 + +// CHECK: vpbroadcastw -256(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x5a,0x80] + vpbroadcastw -256(%rdx), %xmm19 + +// CHECK: vpbroadcastw -258(%rdx), %xmm19 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x79,0x9a,0xfe,0xfe,0xff,0xff] + vpbroadcastw -258(%rdx), %xmm19 + +// CHECK: vpbroadcastw %xmm17, %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0xc9] + vpbroadcastw %xmm17, %ymm25 + +// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7} +// CHECK: encoding: [0x62,0x22,0x7d,0x2f,0x79,0xc9] + vpbroadcastw %xmm17, %ymm25 {%k7} + +// CHECK: vpbroadcastw %xmm17, %ymm25 {%k7} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0xaf,0x79,0xc9] + vpbroadcastw %xmm17, %ymm25 {%k7} {z} + +// CHECK: vpbroadcastw (%rcx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x09] + vpbroadcastw (%rcx), %ymm25 + +// CHECK: vpbroadcastw 291(%rax,%r14,8), %ymm25 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x79,0x8c,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastw 291(%rax,%r14,8), %ymm25 + +// CHECK: vpbroadcastw 254(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x7f] + vpbroadcastw 254(%rdx), %ymm25 + +// CHECK: vpbroadcastw 256(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0x00,0x01,0x00,0x00] + vpbroadcastw 256(%rdx), %ymm25 + +// CHECK: vpbroadcastw -256(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x4a,0x80] + vpbroadcastw -256(%rdx), %ymm25 + +// CHECK: vpbroadcastw -258(%rdx), %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x79,0x8a,0xfe,0xfe,0xff,0xff] + vpbroadcastw -258(%rdx), %ymm25 + +// CHECK: vpbroadcastw %eax, %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x7b,0xe8] + vpbroadcastw %eax, %xmm29 + +// CHECK: vpbroadcastw %eax, %xmm29 {%k1} +// CHECK: encoding: [0x62,0x62,0x7d,0x09,0x7b,0xe8] + vpbroadcastw %eax, %xmm29 {%k1} + +// CHECK: vpbroadcastw %eax, %xmm29 {%k1} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0x89,0x7b,0xe8] + vpbroadcastw %eax, %xmm29 {%k1} {z} + +// CHECK: vpbroadcastw %eax, %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7b,0xe0] + vpbroadcastw %eax, %ymm28 + +// CHECK: vpbroadcastw %eax, %ymm28 {%k4} +// CHECK: encoding: [0x62,0x62,0x7d,0x2c,0x7b,0xe0] + vpbroadcastw %eax, %ymm28 {%k4} + +// CHECK: vpbroadcastw %eax, %ymm28 {%k4} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xac,0x7b,0xe0] + vpbroadcastw %eax, %ymm28 {%k4} {z} + Index: llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s =================================================================== --- llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s +++ llvm/trunk/test/MC/X86/x86-64-avx512f_vl.s @@ -21563,3 +21563,251 @@ // CHECK: encoding: [0x62,0x63,0xc5,0x30,0x25,0x92,0xf8,0xfb,0xff,0xff,0x7b] vpternlogq $0x7b, -1032(%rdx){1to4}, %ymm23, %ymm26 +// CHECK: vpbroadcastd (%rcx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x29] + vpbroadcastd (%rcx), %xmm29 + +// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1} +// CHECK: encoding: [0x62,0x62,0x7d,0x09,0x58,0x29] + vpbroadcastd (%rcx), %xmm29 {%k1} + +// CHECK: vpbroadcastd (%rcx), %xmm29 {%k1} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0x89,0x58,0x29] + vpbroadcastd (%rcx), %xmm29 {%k1} {z} + +// CHECK: vpbroadcastd 291(%rax,%r14,8), %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x58,0xac,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastd 291(%rax,%r14,8), %xmm29 + +// CHECK: vpbroadcastd 508(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x7f] + vpbroadcastd 508(%rdx), %xmm29 + +// CHECK: vpbroadcastd 512(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0x00,0x02,0x00,0x00] + vpbroadcastd 512(%rdx), %xmm29 + +// CHECK: vpbroadcastd -512(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0x6a,0x80] + vpbroadcastd -512(%rdx), %xmm29 + +// CHECK: vpbroadcastd -516(%rdx), %xmm29 +// CHECK: encoding: [0x62,0x62,0x7d,0x08,0x58,0xaa,0xfc,0xfd,0xff,0xff] + vpbroadcastd -516(%rdx), %xmm29 + +// CHECK: vpbroadcastd (%rcx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x21] + vpbroadcastd (%rcx), %ymm28 + +// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2} +// CHECK: encoding: [0x62,0x62,0x7d,0x2a,0x58,0x21] + vpbroadcastd (%rcx), %ymm28 {%k2} + +// CHECK: vpbroadcastd (%rcx), %ymm28 {%k2} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xaa,0x58,0x21] + vpbroadcastd (%rcx), %ymm28 {%k2} {z} + +// CHECK: vpbroadcastd 291(%rax,%r14,8), %ymm28 +// CHECK: encoding: [0x62,0x22,0x7d,0x28,0x58,0xa4,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastd 291(%rax,%r14,8), %ymm28 + +// CHECK: vpbroadcastd 508(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x7f] + vpbroadcastd 508(%rdx), %ymm28 + +// CHECK: vpbroadcastd 512(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0x00,0x02,0x00,0x00] + vpbroadcastd 512(%rdx), %ymm28 + +// CHECK: vpbroadcastd -512(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0x62,0x80] + vpbroadcastd -512(%rdx), %ymm28 + +// CHECK: vpbroadcastd -516(%rdx), %ymm28 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x58,0xa2,0xfc,0xfd,0xff,0xff] + vpbroadcastd -516(%rdx), %ymm28 + +// CHECK: vpbroadcastd %xmm18, %xmm29 +// CHECK: encoding: [0x62,0x22,0x7d,0x08,0x58,0xea] + vpbroadcastd %xmm18, %xmm29 + +// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2} +// CHECK: encoding: [0x62,0x22,0x7d,0x0a,0x58,0xea] + vpbroadcastd %xmm18, %xmm29 {%k2} + +// CHECK: vpbroadcastd %xmm18, %xmm29 {%k2} {z} +// CHECK: encoding: [0x62,0x22,0x7d,0x8a,0x58,0xea] + vpbroadcastd %xmm18, %xmm29 {%k2} {z} + +// CHECK: vpbroadcastd %xmm26, %ymm17 +// CHECK: encoding: [0x62,0x82,0x7d,0x28,0x58,0xca] + vpbroadcastd %xmm26, %ymm17 + +// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3} +// CHECK: encoding: [0x62,0x82,0x7d,0x2b,0x58,0xca] + vpbroadcastd %xmm26, %ymm17 {%k3} + +// CHECK: vpbroadcastd %xmm26, %ymm17 {%k3} {z} +// CHECK: encoding: [0x62,0x82,0x7d,0xab,0x58,0xca] + vpbroadcastd %xmm26, %ymm17 {%k3} {z} + +// CHECK: vpbroadcastd %eax, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf0] + vpbroadcastd %eax, %xmm22 + +// CHECK: vpbroadcastd %eax, %xmm22 {%k5} +// CHECK: encoding: [0x62,0xe2,0x7d,0x0d,0x7c,0xf0] + vpbroadcastd %eax, %xmm22 {%k5} + +// CHECK: vpbroadcastd %eax, %xmm22 {%k5} {z} +// CHECK: encoding: [0x62,0xe2,0x7d,0x8d,0x7c,0xf0] + vpbroadcastd %eax, %xmm22 {%k5} {z} + +// CHECK: vpbroadcastd %ebp, %xmm22 +// CHECK: encoding: [0x62,0xe2,0x7d,0x08,0x7c,0xf5] + vpbroadcastd %ebp, %xmm22 + +// CHECK: vpbroadcastd %r13d, %xmm22 +// CHECK: encoding: [0x62,0xc2,0x7d,0x08,0x7c,0xf5] + vpbroadcastd %r13d, %xmm22 + +// CHECK: vpbroadcastd %eax, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7c,0xc8] + vpbroadcastd %eax, %ymm25 + +// CHECK: vpbroadcastd %eax, %ymm25 {%k5} +// CHECK: encoding: [0x62,0x62,0x7d,0x2d,0x7c,0xc8] + vpbroadcastd %eax, %ymm25 {%k5} + +// CHECK: vpbroadcastd %eax, %ymm25 {%k5} {z} +// CHECK: encoding: [0x62,0x62,0x7d,0xad,0x7c,0xc8] + vpbroadcastd %eax, %ymm25 {%k5} {z} + +// CHECK: vpbroadcastd %ebp, %ymm25 +// CHECK: encoding: [0x62,0x62,0x7d,0x28,0x7c,0xcd] + vpbroadcastd %ebp, %ymm25 + +// CHECK: vpbroadcastd %r13d, %ymm25 +// CHECK: encoding: [0x62,0x42,0x7d,0x28,0x7c,0xcd] + vpbroadcastd %r13d, %ymm25 + +// CHECK: vpbroadcastq (%rcx), %xmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x31] + vpbroadcastq (%rcx), %xmm30 + +// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7} +// CHECK: encoding: [0x62,0x62,0xfd,0x0f,0x59,0x31] + vpbroadcastq (%rcx), %xmm30 {%k7} + +// CHECK: vpbroadcastq (%rcx), %xmm30 {%k7} {z} +// CHECK: encoding: [0x62,0x62,0xfd,0x8f,0x59,0x31] + vpbroadcastq (%rcx), %xmm30 {%k7} {z} + +// CHECK: vpbroadcastq 291(%rax,%r14,8), %xmm30 +// CHECK: encoding: [0x62,0x22,0xfd,0x08,0x59,0xb4,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastq 291(%rax,%r14,8), %xmm30 + +// CHECK: vpbroadcastq 1016(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x7f] + vpbroadcastq 1016(%rdx), %xmm30 + +// CHECK: vpbroadcastq 1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0x00,0x04,0x00,0x00] + vpbroadcastq 1024(%rdx), %xmm30 + +// CHECK: vpbroadcastq -1024(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0x72,0x80] + vpbroadcastq -1024(%rdx), %xmm30 + +// CHECK: vpbroadcastq -1032(%rdx), %xmm30 +// CHECK: encoding: [0x62,0x62,0xfd,0x08,0x59,0xb2,0xf8,0xfb,0xff,0xff] + vpbroadcastq -1032(%rdx), %xmm30 + +// CHECK: vpbroadcastq (%rcx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x19] + vpbroadcastq (%rcx), %ymm19 + +// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2f,0x59,0x19] + vpbroadcastq (%rcx), %ymm19 {%k7} + +// CHECK: vpbroadcastq (%rcx), %ymm19 {%k7} {z} +// CHECK: encoding: [0x62,0xe2,0xfd,0xaf,0x59,0x19] + vpbroadcastq (%rcx), %ymm19 {%k7} {z} + +// CHECK: vpbroadcastq 291(%rax,%r14,8), %ymm19 +// CHECK: encoding: [0x62,0xa2,0xfd,0x28,0x59,0x9c,0xf0,0x23,0x01,0x00,0x00] + vpbroadcastq 291(%rax,%r14,8), %ymm19 + +// CHECK: vpbroadcastq 1016(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x7f] + vpbroadcastq 1016(%rdx), %ymm19 + +// CHECK: vpbroadcastq 1024(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0x00,0x04,0x00,0x00] + vpbroadcastq 1024(%rdx), %ymm19 + +// CHECK: vpbroadcastq -1024(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x5a,0x80] + vpbroadcastq -1024(%rdx), %ymm19 + +// CHECK: vpbroadcastq -1032(%rdx), %ymm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x59,0x9a,0xf8,0xfb,0xff,0xff] + vpbroadcastq -1032(%rdx), %ymm19 + +// CHECK: vpbroadcastq %xmm24, %xmm19 +// CHECK: encoding: [0x62,0x82,0xfd,0x08,0x59,0xd8] + vpbroadcastq %xmm24, %xmm19 + +// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6} +// CHECK: encoding: [0x62,0x82,0xfd,0x0e,0x59,0xd8] + vpbroadcastq %xmm24, %xmm19 {%k6} + +// CHECK: vpbroadcastq %xmm24, %xmm19 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0x8e,0x59,0xd8] + vpbroadcastq %xmm24, %xmm19 {%k6} {z} + +// CHECK: vpbroadcastq %xmm26, %ymm19 +// CHECK: encoding: [0x62,0x82,0xfd,0x28,0x59,0xda] + vpbroadcastq %xmm26, %ymm19 + +// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6} +// CHECK: encoding: [0x62,0x82,0xfd,0x2e,0x59,0xda] + vpbroadcastq %xmm26, %ymm19 {%k6} + +// CHECK: vpbroadcastq %xmm26, %ymm19 {%k6} {z} +// CHECK: encoding: [0x62,0x82,0xfd,0xae,0x59,0xda] + vpbroadcastq %xmm26, %ymm19 {%k6} {z} + +// CHECK: vpbroadcastq %rax, %xmm22 +// CHECK: encoding: [0x62,0xe2,0xfd,0x08,0x7c,0xf0] + vpbroadcastq %rax, %xmm22 + +// CHECK: vpbroadcastq %rax, %xmm22 {%k2} +// CHECK: encoding: [0x62,0xe2,0xfd,0x0a,0x7c,0xf0] + vpbroadcastq %rax, %xmm22 {%k2} + +// CHECK: vpbroadcastq %rax, %xmm22 {%k2} {z} +// CHECK: encoding: [0x62,0xe2,0xfd,0x8a,0x7c,0xf0] + vpbroadcastq %rax, %xmm22 {%k2} {z} + +// CHECK: vpbroadcastq %r8, %xmm22 +// CHECK: encoding: [0x62,0xc2,0xfd,0x08,0x7c,0xf0] + vpbroadcastq %r8, %xmm22 + +// CHECK: vpbroadcastq %rax, %ymm19 +// CHECK: encoding: [0x62,0xe2,0xfd,0x28,0x7c,0xd8] + vpbroadcastq %rax, %ymm19 + +// CHECK: vpbroadcastq %rax, %ymm19 {%k5} +// CHECK: encoding: [0x62,0xe2,0xfd,0x2d,0x7c,0xd8] + vpbroadcastq %rax, %ymm19 {%k5} + +// CHECK: vpbroadcastq %rax, %ymm19 {%k5} {z} +// CHECK: encoding: [0x62,0xe2,0xfd,0xad,0x7c,0xd8] + vpbroadcastq %rax, %ymm19 {%k5} {z} + +// CHECK: vpbroadcastq %r8, %ymm19 +// CHECK: encoding: [0x62,0xc2,0xfd,0x28,0x7c,0xd8] + vpbroadcastq %r8, %ymm19 +