Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -314,10 +314,8 @@ // Vector shift elements VSHL, VSRL, VSRA, - // Vector variable shift right arithmetic. - // Unlike ISD::SRA, in case shift count greater then element size - // use sign bit to fill destination data element. - VSRAV, + // Vector variable shift + VSHLV, VSRLV, VSRAV, // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27198,6 +27198,8 @@ case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; + case X86ISD::VSHLV: return "X86ISD::VSHLV"; + case X86ISD::VSRLV: return "X86ISD::VSRLV"; case X86ISD::VSRAV: return "X86ISD::VSRAV"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -6445,52 +6445,53 @@ defm : avx512_var_shift_lowering; // Special handing for handling VPSRAV intrinsics. -multiclass avx512_var_shift_int_lowering p> { +multiclass avx512_var_shift_int_lowering p> { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), + def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)), (!cast(InstrStr#_.ZSuffix#rr) _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))), + def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), (!cast(InstrStr#_.ZSuffix##rm) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), + (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix#rrk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), + (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, _.RC:$src1, addr:$src2)>; } } -multiclass avx512_var_shift_int_lowering_mb p> : - avx512_var_shift_int_lowering { +multiclass avx512_var_shift_int_lowering_mb p> : + avx512_var_shift_int_lowering { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, + def : Pat<(_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), (!cast(InstrStr#_.ZSuffix##rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, @@ -6498,15 +6499,35 @@ } } -defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering<"VPSRAVW", X86vsrav, v8i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRAVW", X86vsrav, v16i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRAVW", X86vsrav, v32i16_info, [HasBWI]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", X86vsrav, v4i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", X86vsrav, v8i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", X86vsrav, v16i32_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", X86vsrav, v2i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", X86vsrav, v4i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", X86vsrav, v8i64_info, [HasAVX512]>; + +defm : avx512_var_shift_int_lowering<"VPSRLVW", X86vsrlv, v8i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRLVW", X86vsrlv, v16i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRLVW", X86vsrlv, v32i16_info, [HasBWI]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVD", X86vsrlv, v4i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVD", X86vsrlv, v8i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVD", X86vsrlv, v16i32_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVQ", X86vsrlv, v2i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVQ", X86vsrlv, v4i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVQ", X86vsrlv, v8i64_info, [HasAVX512]>; + +defm : avx512_var_shift_int_lowering<"VPSLLVW", X86vshlv, v8i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSLLVW", X86vshlv, v16i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSLLVW", X86vshlv, v32i16_info, [HasBWI]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVD", X86vshlv, v4i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVD", X86vshlv, v8i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVD", X86vshlv, v16i32_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVQ", X86vshlv, v2i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVQ", X86vshlv, v4i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVQ", X86vshlv, v8i64_info, [HasAVX512]>; // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -198,6 +198,8 @@ def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<0>]>; +def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>; +def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>; def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>; def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -8356,6 +8356,42 @@ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + def : Pat<(v4i32 (X86vshlv VR128:$src1, VR128:$src2)), + (VPSLLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86vshlv VR128:$src1, (load addr:$src2))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86vshlv VR256:$src1, VR256:$src2)), + (VPSLLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86vshlv VR256:$src1, (load addr:$src2))), + (VPSLLVDYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v2i64 (X86vshlv VR128:$src1, VR128:$src2)), + (VPSLLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86vshlv VR128:$src1, (load addr:$src2))), + (VPSLLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86vshlv VR256:$src1, VR256:$src2)), + (VPSLLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86vshlv VR256:$src1, (load addr:$src2))), + (VPSLLVQYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v4i32 (X86vsrlv VR128:$src1, VR128:$src2)), + (VPSRLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86vsrlv VR128:$src1, (load addr:$src2))), + (VPSRLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86vsrlv VR256:$src1, VR256:$src2)), + (VPSRLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86vsrlv VR256:$src1, (load addr:$src2))), + (VPSRLVDYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v2i64 (X86vsrlv VR128:$src1, VR128:$src2)), + (VPSRLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86vsrlv VR128:$src1, (load addr:$src2))), + (VPSRLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86vsrlv VR256:$src1, VR256:$src2)), + (VPSRLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86vsrlv VR256:$src1, (load addr:$src2))), + (VPSRLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)), (VPSRAVDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))), Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -364,10 +364,10 @@ X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0), @@ -380,10 +380,10 @@ X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -918,11 +918,11 @@ X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0), @@ -946,11 +946,11 @@ X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), Index: test/CodeGen/X86/avx2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-x86.ll +++ test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1183,38 +1183,58 @@ define <4 x i32> @test_x86_avx2_psllv_d_const() { ; X86-AVX-LABEL: test_x86_avx2_psllv_d_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psllv_d_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [1,1,1,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <4,9,0,u> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [1,1,1,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd %xmm1, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x47,0xc9] +; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) @@ -1241,38 +1261,62 @@ define <8 x i32> @test_x86_avx2_psllv_d_256_const() { ; X86-AVX-LABEL: test_x86_avx2_psllv_d_256_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psllv_d_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [8,8,8,8,8,8,8,8] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <4,9,0,u,12,7,u,0> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x47,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) @@ -1316,14 +1360,20 @@ ; ; X64-AVX-LABEL: test_x86_avx2_psllv_q_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00] -; X64-AVX-NEXT: vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,18446744073709551615] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: movl $8, %eax # encoding: [0xb8,0x08,0x00,0x00,0x00] -; X64-AVX512VL-NEXT: vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,18446744073709551615] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) ret <2 x i64> %res @@ -1366,15 +1416,19 @@ ; ; X64-AVX-LABEL: test_x86_avx2_psllv_q_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [8,8,8,8] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [4,4,4,18446744073709551615] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psllv_q_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8] -; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,18446744073709551615] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) @@ -1400,38 +1454,62 @@ define <4 x i32> @test_x86_avx2_psrlv_d_const() { ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} xmm0 = [2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x79,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,0,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [4,4,4,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = <1,9,0,u> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,9,0,4294967295] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm1 # EVEX TO VEX Compression xmm1 = [4,4,4,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0x45,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) @@ -1458,38 +1536,62 @@ define <8 x i32> @test_x86_avx2_psrlv_d_256_const() { ; X86-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: ; X86-AVX: # %bb.0: -; X86-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] -; X86-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX-NEXT: vpaddd {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X86-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X86-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX-NEXT: retl # encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: ; X86-AVX512VL: # %bb.0: -; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0> +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{\.LCPI.*}}, %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{\.LCPI.*}}, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X86-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X86-AVX512VL-NEXT: retl # encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_d_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vpbroadcastd {{.*#+}} ymm0 = [2,2,2,2,2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x58,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpaddd {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0x05,A,A,A,A] +; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = <1,9,0,u,0,7,u,0> +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,0,4294967295,3,7,4294967295,0] ; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm1 # EVEX TO VEX Compression ymm1 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xfd,0x6f,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{.*}}(%rip), %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x45,0x0d,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1] ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) @@ -1534,14 +1636,20 @@ ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] -; X64-AVX-NEXT: vmovq %rax, %xmm0 # encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [4,4] +; X64-AVX-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: movl $2, %eax # encoding: [0xb8,0x02,0x00,0x00,0x00] -; X64-AVX512VL-NEXT: vmovq %rax, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe1,0xf9,0x6e,0xc0] +; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4] +; X64-AVX512VL-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) ret <2 x i64> %res @@ -1585,15 +1693,19 @@ ; ; X64-AVX-LABEL: test_x86_avx2_psrlv_q_256_const: ; X64-AVX: # %bb.0: -; X64-AVX-NEXT: vbroadcastsd {{.*#+}} ymm0 = [2,2,2,2] -; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX-NEXT: vpbroadcastq {{.*#+}} ymm0 = [4,4,4,4] +; X64-AVX-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] +; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] ; X64-AVX-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq # encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_q_256_const: ; X64-AVX512VL: # %bb.0: -; X64-AVX512VL-NEXT: vbroadcastsd {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2] -; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x19,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4] +; X64-AVX512VL-NEXT: # encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvq {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] ; X64-AVX512VL-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq # encoding: [0xc3] %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -5229,8 +5229,11 @@ define <16 x i32> @test_x86_avx512_psllv_d_512_const() { ; CHECK-LABEL: test_x86_avx512_psllv_d_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,u,0,32,5,u,0,80,3,u,0> -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0] +; CHECK-NEXT: vpsllvd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295] +; CHECK-NEXT: vpsllvd {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) @@ -5277,8 +5280,11 @@ define <8 x i64> @test_x86_avx512_psllv_q_512_const() { ; CHECK-LABEL: test_x86_avx512_psllv_q_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <4,9,0,u,12,7,18446744056529682432,0> -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0] +; CHECK-NEXT: vpsllvq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615] +; CHECK-NEXT: vpsllvq {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) @@ -5397,8 +5403,11 @@ define <16 x i32> @test_x86_avx512_psrlv_d_512_const() { ; CHECK-LABEL: test_x86_avx512_psrlv_d_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,u,0,0,5,u,0,0,3,u,0> -; CHECK-NEXT: vpaddd {{.*}}(%rip){1to16}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0] +; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295] +; CHECK-NEXT: vpsrlvd {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddd %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) @@ -5445,8 +5454,11 @@ define <8 x i64> @test_x86_avx512_psrlv_q_512_const() { ; CHECK-LABEL: test_x86_avx512_psrlv_q_512_const: ; CHECK: ## %bb.0: -; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = <1,9,0,u,0,7,1073741823,0> -; CHECK-NEXT: vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0] +; CHECK-NEXT: vpsrlvq {{.*}}(%rip), %zmm0, %zmm0 +; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,4,4,4,4,4,4,18446744073709551615] +; CHECK-NEXT: vpsrlvq {{.*}}(%rip), %zmm1, %zmm1 +; CHECK-NEXT: vpaddq %zmm1, %zmm0, %zmm0 ; CHECK-NEXT: retq %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1158,15 +1158,19 @@ define <32 x i16> @test_x86_avx512_psrlv_w_512_const() optsize { ; X86-LABEL: test_x86_avx512_psrlv_w_512_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X86-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx512_psrlv_w_512_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*#+}} zmm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X64-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> , <32 x i16> ) @@ -1377,15 +1381,19 @@ define <32 x i16> @test_x86_avx512_psllv_w_512_const() optsize { ; X86-LABEL: test_x86_avx512_psllv_w_512_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X86-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X86-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] ; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_x86_avx512_psllv_w_512_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*#+}} zmm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X64-NEXT: # encoding: [0x62,0xf2,0x7d,0x48,0x79,0x05,A,A,A,A] +; X64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] ; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> , <32 x i16> ) Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2021,16 +2021,20 @@ define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const() optsize { ; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2] -; X86-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,2,2,2,2,2,2,2] -; X64-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> , <8 x i16> ) ret <8 x i16> %res @@ -2041,16 +2045,20 @@ define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const() optsize { ; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X86-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2] -; X64-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> , <16 x i16> ) ret <16 x i16> %res @@ -2195,16 +2203,20 @@ define <8 x i16> @test_int_x86_avx512_psllv_w_128_const() optsize { ; X86-LABEL: test_int_x86_avx512_psllv_w_128_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8] -; X86-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psllv_w_128_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [8,8,8,8,8,8,8,8] -; X64-NEXT: # encoding: [0xc4,0xe2,0x79,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> , <8 x i16> ) ret <8 x i16> %res @@ -2216,16 +2228,20 @@ define <16 x i16> @test_int_x86_avx512_psllv_w_256_const() optsize { ; X86-LABEL: test_int_x86_avx512_psllv_w_256_const: ; X86: # %bb.0: -; X86-NEXT: vpbroadcastw {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X86-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X86-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_int_x86_avx512_psllv_w_256_const: ; X64: # %bb.0: -; X64-NEXT: vpbroadcastw {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; X64-NEXT: # encoding: [0xc4,0xe2,0x7d,0x79,0x05,A,A,A,A] -; X64-NEXT: # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte ; X64-NEXT: retq # encoding: [0xc3] %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> , <16 x i16> ) ret <16 x i16> %res