Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -304,10 +304,8 @@ // Vector shift elements VSHL, VSRL, VSRA, - // Vector variable shift right arithmetic. - // Unlike ISD::SRA, in case shift count greater then element size - // use sign bit to fill destination data element. - VSRAV, + // Vector variable shift + VSHLV, VSRLV, VSRAV, // Vector shift elements by immediate VSHLI, VSRLI, VSRAI, Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -27101,6 +27101,8 @@ case X86ISD::VSHLI: return "X86ISD::VSHLI"; case X86ISD::VSRLI: return "X86ISD::VSRLI"; case X86ISD::VSRAI: return "X86ISD::VSRAI"; + case X86ISD::VSHLV: return "X86ISD::VSHLV"; + case X86ISD::VSRLV: return "X86ISD::VSRLV"; case X86ISD::VSRAV: return "X86ISD::VSRAV"; case X86ISD::VROTLI: return "X86ISD::VROTLI"; case X86ISD::VROTRI: return "X86ISD::VROTRI"; Index: lib/Target/X86/X86InstrAVX512.td =================================================================== --- lib/Target/X86/X86InstrAVX512.td +++ lib/Target/X86/X86InstrAVX512.td @@ -6432,52 +6432,53 @@ defm : avx512_var_shift_lowering; // Special handing for handling VPSRAV intrinsics. -multiclass avx512_var_shift_int_lowering p> { +multiclass avx512_var_shift_int_lowering p> { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, _.RC:$src2)), + def : Pat<(_.VT (OpNode _.RC:$src1, _.RC:$src2)), (!cast(InstrStr#_.ZSuffix#rr) _.RC:$src1, _.RC:$src2)>; - def : Pat<(_.VT (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2))), + def : Pat<(_.VT (OpNode _.RC:$src1, (_.LdFrag addr:$src2))), (!cast(InstrStr#_.ZSuffix##rm) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.RC:$src0)), + (OpNode _.RC:$src1, _.RC:$src2), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix#rrk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), + (OpNode _.RC:$src1, _.RC:$src2), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix#rrkz) _.KRC:$mask, _.RC:$src1, _.RC:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, (_.LdFrag addr:$src2)), + (OpNode _.RC:$src1, (_.LdFrag addr:$src2)), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmkz) _.KRC:$mask, _.RC:$src1, addr:$src2)>; } } -multiclass avx512_var_shift_int_lowering_mb p> : - avx512_var_shift_int_lowering { +multiclass avx512_var_shift_int_lowering_mb p> : + avx512_var_shift_int_lowering { let Predicates = p in { - def : Pat<(_.VT (X86vsrav _.RC:$src1, + def : Pat<(_.VT (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2)))), (!cast(InstrStr#_.ZSuffix##rmb) _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.RC:$src0)), (!cast(InstrStr#_.ZSuffix##rmbk) _.RC:$src0, _.KRC:$mask, _.RC:$src1, addr:$src2)>; def : Pat<(_.VT (vselect _.KRCWM:$mask, - (X86vsrav _.RC:$src1, + (OpNode _.RC:$src1, (X86VBroadcast (_.ScalarLdFrag addr:$src2))), _.ImmAllZerosV)), (!cast(InstrStr#_.ZSuffix##rmbkz) _.KRC:$mask, @@ -6485,15 +6486,35 @@ } } -defm : avx512_var_shift_int_lowering<"VPSRAVW", v8i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v16i16x_info, [HasVLX, HasBWI]>; -defm : avx512_var_shift_int_lowering<"VPSRAVW", v32i16_info, [HasBWI]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v4i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v8i32x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", v16i32_info, [HasAVX512]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v2i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v4i64x_info, [HasVLX]>; -defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", v8i64_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering<"VPSRAVW", X86vsrav, v8i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRAVW", X86vsrav, v16i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRAVW", X86vsrav, v32i16_info, [HasBWI]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", X86vsrav, v4i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", X86vsrav, v8i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVD", X86vsrav, v16i32_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", X86vsrav, v2i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", X86vsrav, v4i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRAVQ", X86vsrav, v8i64_info, [HasAVX512]>; + +defm : avx512_var_shift_int_lowering<"VPSRLVW", X86vsrlv, v8i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRLVW", X86vsrlv, v16i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSRLVW", X86vsrlv, v32i16_info, [HasBWI]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVD", X86vsrlv, v4i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVD", X86vsrlv, v8i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVD", X86vsrlv, v16i32_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVQ", X86vsrlv, v2i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVQ", X86vsrlv, v4i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSRLVQ", X86vsrlv, v8i64_info, [HasAVX512]>; + +defm : avx512_var_shift_int_lowering<"VPSLLVW", X86vshlv, v8i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSLLVW", X86vshlv, v16i16x_info, [HasVLX, HasBWI]>; +defm : avx512_var_shift_int_lowering<"VPSLLVW", X86vshlv, v32i16_info, [HasBWI]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVD", X86vshlv, v4i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVD", X86vshlv, v8i32x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVD", X86vshlv, v16i32_info, [HasAVX512]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVQ", X86vshlv, v2i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVQ", X86vshlv, v4i64x_info, [HasVLX]>; +defm : avx512_var_shift_int_lowering_mb<"VPSLLVQ", X86vshlv, v8i64_info, [HasAVX512]>; // Use 512bit VPROL/VPROLI version to implement v2i64/v4i64 + v4i32/v8i32 in case NoVLX. let Predicates = [HasAVX512, NoVLX] in { Index: lib/Target/X86/X86InstrFragmentsSIMD.td =================================================================== --- lib/Target/X86/X86InstrFragmentsSIMD.td +++ lib/Target/X86/X86InstrFragmentsSIMD.td @@ -181,6 +181,8 @@ def X86vshiftvariable : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0,1>, SDTCisSameAs<0,2>, SDTCisInt<0>]>; +def X86vshlv : SDNode<"X86ISD::VSHLV", X86vshiftvariable>; +def X86vsrlv : SDNode<"X86ISD::VSRLV", X86vshiftvariable>; def X86vsrav : SDNode<"X86ISD::VSRAV", X86vshiftvariable>; def X86vshli : SDNode<"X86ISD::VSHLI", X86vshiftimm>; Index: lib/Target/X86/X86InstrSSE.td =================================================================== --- lib/Target/X86/X86InstrSSE.td +++ lib/Target/X86/X86InstrSSE.td @@ -8356,6 +8356,42 @@ defm VPSRLVQ : avx2_var_shift<0x45, "vpsrlvq", srl, v2i64, v4i64>, VEX_W; defm VPSRAVD : avx2_var_shift<0x46, "vpsravd", sra, v4i32, v8i32>; + def : Pat<(v4i32 (X86vshlv VR128:$src1, VR128:$src2)), + (VPSLLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86vshlv VR128:$src1, (load addr:$src2))), + (VPSLLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86vshlv VR256:$src1, VR256:$src2)), + (VPSLLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86vshlv VR256:$src1, (load addr:$src2))), + (VPSLLVDYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v2i64 (X86vshlv VR128:$src1, VR128:$src2)), + (VPSLLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86vshlv VR128:$src1, (load addr:$src2))), + (VPSLLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86vshlv VR256:$src1, VR256:$src2)), + (VPSLLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86vshlv VR256:$src1, (load addr:$src2))), + (VPSLLVQYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v4i32 (X86vsrlv VR128:$src1, VR128:$src2)), + (VPSRLVDrr VR128:$src1, VR128:$src2)>; + def : Pat<(v4i32 (X86vsrlv VR128:$src1, (load addr:$src2))), + (VPSRLVDrm VR128:$src1, addr:$src2)>; + def : Pat<(v8i32 (X86vsrlv VR256:$src1, VR256:$src2)), + (VPSRLVDYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v8i32 (X86vsrlv VR256:$src1, (load addr:$src2))), + (VPSRLVDYrm VR256:$src1, addr:$src2)>; + + def : Pat<(v2i64 (X86vsrlv VR128:$src1, VR128:$src2)), + (VPSRLVQrr VR128:$src1, VR128:$src2)>; + def : Pat<(v2i64 (X86vsrlv VR128:$src1, (load addr:$src2))), + (VPSRLVQrm VR128:$src1, addr:$src2)>; + def : Pat<(v4i64 (X86vsrlv VR256:$src1, VR256:$src2)), + (VPSRLVQYrr VR256:$src1, VR256:$src2)>; + def : Pat<(v4i64 (X86vsrlv VR256:$src1, (load addr:$src2))), + (VPSRLVQYrm VR256:$src1, addr:$src2)>; + def : Pat<(v4i32 (X86vsrav VR128:$src1, VR128:$src2)), (VPSRAVDrr VR128:$src1, VR128:$src2)>; def : Pat<(v4i32 (X86vsrav VR128:$src1, (load addr:$src2))), Index: lib/Target/X86/X86IntrinsicsInfo.h =================================================================== --- lib/Target/X86/X86IntrinsicsInfo.h +++ lib/Target/X86/X86IntrinsicsInfo.h @@ -339,10 +339,10 @@ X86_INTRINSIC_DATA(avx2_pslli_d, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_q, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx2_pslli_w, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx2_psllv_d, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_d_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx2_psllv_q_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx2_psra_d, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psra_w, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx2_psrai_d, VSHIFT, X86ISD::VSRAI, 0), @@ -355,10 +355,10 @@ X86_INTRINSIC_DATA(avx2_psrli_d, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_q, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx2_psrli_w, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_d_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx2_psrlv_q_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_add_pd_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_add_ps_512, INTR_TYPE_2OP, ISD::FADD, X86ISD::FADD_RND), X86_INTRINSIC_DATA(avx512_cmp_pd_128, CMP_MASK_CC, X86ISD::CMPM, 0), @@ -941,11 +941,11 @@ X86_INTRINSIC_DATA(avx512_pslli_d_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_q_512, VSHIFT, X86ISD::VSHLI, 0), X86_INTRINSIC_DATA(avx512_pslli_w_512, VSHIFT, X86ISD::VSHLI, 0), - X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, ISD::SHL, 0), - X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, ISD::SHL, 0), + X86_INTRINSIC_DATA(avx512_psllv_d_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_q_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_128, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_256, INTR_TYPE_2OP, X86ISD::VSHLV, 0), + X86_INTRINSIC_DATA(avx512_psllv_w_512, INTR_TYPE_2OP, X86ISD::VSHLV, 0), X86_INTRINSIC_DATA(avx512_psra_d_512, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_128, INTR_TYPE_2OP, X86ISD::VSRA, 0), X86_INTRINSIC_DATA(avx512_psra_q_256, INTR_TYPE_2OP, X86ISD::VSRA, 0), @@ -969,11 +969,11 @@ X86_INTRINSIC_DATA(avx512_psrli_d_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_q_512, VSHIFT, X86ISD::VSRLI, 0), X86_INTRINSIC_DATA(avx512_psrli_w_512, VSHIFT, X86ISD::VSRLI, 0), - X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, ISD::SRL, 0), - X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, ISD::SRL, 0), + X86_INTRINSIC_DATA(avx512_psrlv_d_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_q_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_128, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_256, INTR_TYPE_2OP, X86ISD::VSRLV, 0), + X86_INTRINSIC_DATA(avx512_psrlv_w_512, INTR_TYPE_2OP, X86ISD::VSRLV, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_128, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_256, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), X86_INTRINSIC_DATA(avx512_pternlog_d_512, INTR_TYPE_4OP, X86ISD::VPTERNLOG, 0), Index: test/CodeGen/X86/avx2-intrinsics-x86.ll =================================================================== --- test/CodeGen/X86/avx2-intrinsics-x86.ll +++ test/CodeGen/X86/avx2-intrinsics-x86.ll @@ -1179,6 +1179,66 @@ %res = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } + +define <4 x i32> @test_x86_avx2_psllv_d_const(<4 x i32> %a0, <4 x i32> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psllv_d_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %xmm2 ## xmm2 = [2,9,0,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvd {{LCPI.*}},, %xmm2, %xmm2 ## encoding: [0xc4,0xe2,0x69,0x47,0x15,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %xmm3 ## xmm3 = [1,1,1,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvd %xmm3, %xmm3, %xmm3 ## encoding: [0xc4,0xe2,0x61,0x47,0xdb] +; X86-AVX2: retl ## encoding: [0xc3] +; +; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: +; X86-AVX512VL: ## %bb.0: +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %xmm2 ## EVEX TO VEX Compression xmm2 = [2,9,0,4294967295] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{LCPI.*}}, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0x47,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %xmm3 ## EVEX TO VEX Compression xmm3 = [1,1,1,4294967295] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd %xmm3, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x61,0x47,0xdb] +; X86-AVX512VL: retl ## encoding: [0xc3] +; +; X64-AVX2-LABEL: test_x86_avx2_psllv_d_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm2 ## xmm2 = [2,9,0,4294967295] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvd {{LCPI.*}}(%rip), %xmm2, %xmm2 ## encoding: [0xc4,0xe2,0x69,0x47,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vmovdqa {{LCPI*}}%rip), %xmm3 ## xmm3 = [1,1,1,4294967295] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvd %xmm3, %xmm3, %xmm3 ## encoding: [0xc4,0xe2,0x61,0x47,0xdb] +; X64-AVX2: retq ## encoding: [0xc3] +; +; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_const: +; X64-AVX512VL: ## %bb.0: +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm2 ## EVEX TO VEX Compression xmm2 = [2,9,0,4294967295] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{LCPI.*}}(%rip), %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0x47,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm3 ## EVEX TO VEX Compression xmm3 = [1,1,1,4294967295] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd %xmm3, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x61,0x47,0xdb] +; X64-AVX512VL: retq ## encoding: [0xc3] + %res0 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) ; <<4 x i32>> [#uses=1] + %res2 = add <4 x i32> %a0, %res0 + %res1 = call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> , <4 x i32> ) + %res3 = add <4 x i32> %a1, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) nounwind readnone @@ -1195,6 +1255,70 @@ %res = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res } + +define <8 x i32> @test_x86_avx2_psllv_d_256_const(<8 x i32> %a0, <8 x i32> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psllv_d_256_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm2 ## ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvd {{LCPI.*}}, %ymm2, %ymm2 ## encoding: [0xc4,0xe2,0x6d,0x47,0x15,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm3 ## ymm3 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvd {{LCPI\.*}}, %ymm3, %ymm3 ## encoding: [0xc4,0xe2,0x65,0x47,0x1d,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.}}, kind: FK_Data_4 +; X86-AVX2: retl ## encoding: [0xc3] + +; X86-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: +; X86-AVX512VL: ## %bb.0: +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %ymm2 ## EVEX TO VEX Compression ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{LCPI.*}}, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x6d,0x47,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %ymm3 ## EVEX TO VEX Compression ymm3 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsllvd {{LCPI.*}}, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x65,0x47,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psllv_d_256_const: +; X64-AVX2-NEXT: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm2 ## ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvd {{LCPI.*}}(%rip), %ymm2, %ymm2 ## encoding: [0xc4,0xe2,0x6d,0x47,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm3 ## ymm3 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvd {{LCPI.*}}(%rip), %ymm3, %ymm3 ## encoding: [0xc4,0xe2,0x65,0x47,0x1d,A,A,A,A] +; X64-AVX2: retl ## encoding: [0xc3] + +; X64-AVX512VL-LABEL: test_x86_avx2_psllv_d_256_const: +; X64-AVX512VL: ## %bb.0: +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm2 ## EVEX TO VEX Compression ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{LCPI.*}}(%rip), %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x6d,0x47,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm3 ## EVEX TO VEX Compression ymm3 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsllvd {{LCPI.*}}(%rip), %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x65,0x47,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL: retq ## encoding: [0xc3] + + %res0 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) ; <<8 x i32>> [#uses=1] + %res2 = add <8 x i32> %a0, %res0 + %res1 = call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> , <8 x i32> ) ; <<8 x i32>> [#uses=1] + %res3 = add <8 x i32> %a1, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) nounwind readnone @@ -1211,6 +1335,45 @@ %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } +define <2 x i64> @test_x86_avx2_psllv_q_const(<2 x i64> %a0, <2 x i64> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psllv_q_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %xmm0 ## xmm0 = [4,0,4294967295,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvq {{LCPI.*}}, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X86-AVX2-LABEL: test_x86_avx2_psllv_q_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %xmm0 ## EVEX TO VEX Compression xmm0 = [4,0,4294967295,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvq {{LCPI.*}}, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psllv_q_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm0 ## xmm0 = [4,18446744073709551615] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvq {{LCPI.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psllv_q_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}{{LCPI.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [4,18446744073709551615] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvq {{LCPI.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> , <2 x i64> ) + ret <2 x i64> %res +} declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) nounwind readnone @@ -1227,6 +1390,46 @@ %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res } + +define <4 x i64> @test_x86_avx2_psllv_q_256_const(<4 x i64> %a0, <4 x i64> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psllv_q_256_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm0 ## ymm0 = [4,0,4,0,4,0,4294967295,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvq {{LCPI.*}}, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X86-AVX2-LABEL: test_x86_avx2_psllv_q_256_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm0 ## EVEX TO VEX Compression ymm0 = [4,0,4,0,4,0,4294967295,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsllvq {{LCPI.*}}, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psllv_q_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm0 ## ymm0 = [4,4,4,18446744073709551615] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvq {{LCPI.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psllv_q_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [4,4,4,18446744073709551615] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsllvq {{LCPI.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> , <4 x i64> ) + ret <4 x i64> %res +} declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) nounwind readnone @@ -1243,6 +1446,71 @@ %res = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %a0, <4 x i32> %a1) ; <<4 x i32>> [#uses=1] ret <4 x i32> %res } + +define <4 x i32> @test_x86_avx2_psrlv_d_const(<4 x i32> %a0, <4 x i32> %a1) { +; X86-AVX-LABEL: test_x86_avx2_psrlv_d_const: +; X86-AVX: ## %bb.0: +; X86-AVX-NEXT: vmovdqa {{LCPI.*}}, %xmm2 ## xmm2 = [2,9,0,4294967295] +; X86-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{LCPI.*}}, %xmm2, %xmm2 ## encoding: [0xc4,0xe2,0x69,0x45,0x15,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vmovdqa {{LCPI.*}}, %xmm3 ## xmm3 = [4,4,4,4294967295] +; X86-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsrlvd {{LCPI.*}}, %xmm3, %xmm3 ## encoding: [0xc4,0xe2,0x61,0x45,0x1d,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX: retl ## encoding: [0xc3] + +; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: +; X86-AVX512VL: ## %bb.0: +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %xmm2 ## EVEX TO VEX Compression xmm2 = [2,9,0,4294967295] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}, %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0x45,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %xmm3 ## EVEX TO VEX Compression xmm3 = [4,4,4,4294967295] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}, %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x61,0x45,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psrlv_d_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm2 ## xmm2 = [2,9,0,4294967295] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvd {{LCPI.*}}(%rip), %xmm2, %xmm2 ## encoding: [0xc4,0xe2,0x69,0x45,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm3 ## xmm3 = [4,4,4,4294967295] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvd {{LCPI.*}}(%rip), %xmm3, %xmm3 ## encoding: [0xc4,0xe2,0x61,0x45,0x1d,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2: retq ## encoding: [0xc3] + +; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_const: +; X64-AVX512VL: ## %bb.0: +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm2 ## EVEX TO VEX Compression xmm2 = [2,9,0,4294967295] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}(%rip), %xmm2, %xmm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x69,0x45,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm3 ## EVEX TO VEX Compression xmm3 = [4,4,4,4294967295] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}(%rip), %xmm3, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x61,0x45,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL: retq ## encoding: [0xc3] + + %res0 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) ; <<4 x i32>> [#uses=1] + %res2 = add <4 x i32> %a0, %res0 + %res1 = call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> , <4 x i32> ) ; <<4 x i32>> [#uses=1] + %res3 = add <4 x i32> %a1, %res1 + %res4 = add <4 x i32> %res2, %res3 + ret <4 x i32> %res4 +} declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) nounwind readnone @@ -1259,6 +1527,71 @@ %res = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %a0, <8 x i32> %a1) ; <<8 x i32>> [#uses=1] ret <8 x i32> %res } + +define <8 x i32> @test_x86_avx2_psrlv_d_256_const(<8 x i32> %a0, <8 x i32> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psrlv_d_256_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm2 ## ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsrlvd {{LCPI.*}}, %ymm2, %ymm2 ## encoding: [0xc4,0xe2,0x6d,0x45,0x15,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm3 ## ymm3 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsrlvd {{LCPI.*}}, %ymm3, %ymm3 ## encoding: [0xc4,0xe2,0x65,0x45,0x1d,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2: retl ## encoding: [0xc3] + +; X86-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: +; X86-AVX512VL: ## %bb.0: +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %ymm2 ## EVEX TO VEX Compression ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}, %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x6d,0x45,0x15,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %ymm3 ## EVEX TO VEX Compression ymm3 = [4,4,4,4,4,4,4,4294967295] +; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}, %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x65,0x45,0x1d,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psrlv_d_256_const: +; X64-AVX2-NEXT: ## %bb.0: +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm2 ## ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvd {{LCPI.*}}(%rip), %ymm2, %ymm2 ## encoding: [0xc4,0xe2,0x6d,0x45,0x15,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm3 ## ymm3 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvd {{LCPI.*}}(%rip), %ymm3, %ymm3 ## encoding: [0xc4,0xe2,0x65,0x45,0x1d,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2: retq ## encoding: [0xc3] + +; X64-AVX512VL-LABEL: test_x86_avx2_psrlv_d_256_const: +; X64-AVX512VL: ## %bb.0: +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm2 ## EVEX TO VEX Compression ymm2 = [2,9,0,4294967295,3,7,4294967295,0] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}(%rip), %ymm2, %ymm2 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x6d,0x45,0x15,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm3 ## EVEX TO VEX Compression ymm3 = [4,4,4,4,4,4,4,4294967295] +; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsrlvd {{LCPI.*}}(%rip), %ymm3, %ymm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x65,0x45,0x1d,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2: retq ## encoding: [0xc3] + + %res0 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) ; <<8 x i32>> [#uses=1] + %res2 = add <8 x i32> %a0, %res0 + %res1 = call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> , <8 x i32> ) ; <<8 x i32>> [#uses=1] + %res3 = add <8 x i32> %a1, %res1 + %res4 = add <8 x i32> %res2, %res3 + ret <8 x i32> %res4 +} declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) nounwind readnone @@ -1275,6 +1608,46 @@ %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %a0, <2 x i64> %a1) ; <<2 x i64>> [#uses=1] ret <2 x i64> %res } + +define <2 x i64> @test_x86_avx2_psrlv_q_const(<2 x i64> %a0, <2 x i64> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psrlv_q_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %xmm0 ## xmm0 = [4,0,4,0] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsrlvq {{LCPI.*}}, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X86-AVX2-LABEL: test_x86_avx2_psrlv_q_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %xmm0 ## EVEX TO VEX Compression xmm0 = [4,0,4,0] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsrlvq {{LCPI.*}}, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psrlv_q_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{LCPI.*}}(%rip), %xmm0 ## xmm0 = [4,4] +; X64-AVX2-NEXT: ## encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvq {{LCPI.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psrlv_q_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{LCPI.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [4,4] +; X64-AVX2-NEXT: ## encoding: [0xc4,0xe2,0x79,0x59,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvq {{LCPI.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + %res = call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> , <2 x i64> ) + ret <2 x i64> %res +} declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) nounwind readnone @@ -1291,6 +1664,47 @@ %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %a0, <4 x i64> %a1) ; <<4 x i64>> [#uses=1] ret <4 x i64> %res } + + +define <4 x i64> @test_x86_avx2_psrlv_q_256_const(<4 x i64> %a0, <4 x i64> %a1) { +; X86-AVX2-LABEL: test_x86_avx2_psrlv_q_256_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm0 ## ymm0 = [4,0,4,0,4,0,4,0] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsrlvq {{LCPI.*}}, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X86-AVX2-LABEL: test_x86_avx2_psrlv_q_256_const: +; X86-AVX2: ## %bb.0: +; X86-AVX2-NEXT: vmovdqa {{LCPI.*}}, %ymm0 ## EVEX TO VEX Compression ymm0 = [4,0,4,0,4,0,4,0] +; X86-AVX2-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: vpsrlvq {{LCPI.*}}, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] +; X86-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX2-NEXT: retl ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psrlv_q_256_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{LCPI.*}}(%rip), %ymm0 ## ymm0 = [4,4,4,4] +; X64-AVX2-NEXT: ## encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvq {{LCPI.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + +; X64-AVX2-LABEL: test_x86_avx2_psrlv_q_256_const: +; X64-AVX2: ## %bb.0: +; X64-AVX2-NEXT: vpbroadcastq {{LCPI.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [4,4,4,4] +; X64-AVX2-NEXT: ## encoding: [0xc4,0xe2,0x7d,0x59,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: vpsrlvq {{LCPI.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0x05,A,A,A,A] +; X64-AVX2-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX2-NEXT: retq ## encoding: [0xc3] + %res = call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> , <4 x i64> ) + ret <4 x i64> %res +} declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) nounwind readnone @@ -1308,47 +1722,49 @@ ret <4 x i32> %res } + define <4 x i32> @test_x86_avx2_psrav_d_const(<4 x i32> %a0, <4 x i32> %a1) { ; X86-AVX-LABEL: test_x86_avx2_psrav_d_const: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] +; X86-AVX-NEXT: vmovdqa {{LCPI.*}}, %xmm0 ## xmm0 = [2,9,4294967284,23] ; X86-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI79_0, kind: FK_Data_4 -; X86-AVX-NEXT: vpsravd LCPI79_1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI79_1, kind: FK_Data_4 +; X86-AVX-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsravd {{LCPI.*}}, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 ; X86-AVX-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; X86-AVX512VL: ## %bb.0: -; X86-AVX512VL-NEXT: vmovdqa LCPI79_0, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI79_0, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpsravd LCPI79_1, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI79_1, kind: FK_Data_4 +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsravd {{LCPI.*}}, %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 ; X86-AVX512VL-NEXT: retl ## encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrav_d_const: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} xmm0 = [2,9,4294967284,23] +; X64-AVX-NEXT: vmovdqa {{LCPI.*}} xmm0 = [2,9,4294967284,23] ; X64-AVX-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI79_0-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI79_1-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsravd {{LCPI.*}}(%rip), %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; X64-AVX-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_const: ; X64-AVX512VL: ## %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %xmm0 ## EVEX TO VEX Compression xmm0 = [2,9,4294967284,23] ; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI79_0-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI79_1-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsravd {{LCPI.*}}(%rip), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq ## encoding: [0xc3] %res = call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> , <4 x i32> ) ret <4 x i32> %res } declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) nounwind readnone + define <8 x i32> @test_x86_avx2_psrav_d_256(<8 x i32> %a0, <8 x i32> %a1) { ; AVX2-LABEL: test_x86_avx2_psrav_d_256: ; AVX2: ## %bb.0: @@ -1363,47 +1779,49 @@ ret <8 x i32> %res } + define <8 x i32> @test_x86_avx2_psrav_d_256_const(<8 x i32> %a0, <8 x i32> %a1) { ; X86-AVX-LABEL: test_x86_avx2_psrav_d_256_const: ; X86-AVX: ## %bb.0: -; X86-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; X86-AVX-NEXT: vmovdqa {{LCPI.*}}, %ymm0 ## ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X86-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 4, value: LCPI81_0, kind: FK_Data_4 -; X86-AVX-NEXT: vpsravd LCPI81_1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X86-AVX-NEXT: ## fixup A - offset: 5, value: LCPI81_1, kind: FK_Data_4 +; X86-AVX-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX-NEXT: vpsravd {{LCPI.*}}, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; X86-AVX-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 ; X86-AVX-NEXT: retl ## encoding: [0xc3] ; ; X86-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; X86-AVX512VL: ## %bb.0: -; X86-AVX512VL-NEXT: vmovdqa LCPI81_0, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; X86-AVX512VL-NEXT: vmovdqa {{LCPI.*}}, %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X86-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI81_0, kind: FK_Data_4 -; X86-AVX512VL-NEXT: vpsravd LCPI81_1, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI81_1, kind: FK_Data_4 +; X86-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: FK_Data_4 +; X86-AVX512VL-NEXT: vpsravd {{LCPI.*}}, %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; X86-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: FK_Data_4 ; X86-AVX512VL-NEXT: retl ## encoding: [0xc3] ; ; X64-AVX-LABEL: test_x86_avx2_psrav_d_256_const: ; X64-AVX: ## %bb.0: -; X64-AVX-NEXT: vmovdqa {{.*#+}} ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; X64-AVX-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm0 ## ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X64-AVX-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 4, value: LCPI81_0-4, kind: reloc_riprel_4byte -; X64-AVX-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X64-AVX-NEXT: ## fixup A - offset: 5, value: LCPI81_1-4, kind: reloc_riprel_4byte +; X64-AVX-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX-NEXT: vpsravd {{LCPI.*}}(%rip), %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; X64-AVX-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte ; X64-AVX-NEXT: retq ## encoding: [0xc3] ; ; X64-AVX512VL-LABEL: test_x86_avx2_psrav_d_256_const: ; X64-AVX512VL: ## %bb.0: -; X64-AVX512VL-NEXT: vmovdqa {{.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] +; X64-AVX512VL-NEXT: vmovdqa {{LCPI.*}}(%rip), %ymm0 ## EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51] ; X64-AVX512VL-NEXT: ## encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: LCPI81_0-4, kind: reloc_riprel_4byte -; X64-AVX512VL-NEXT: vpsravd {{.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] -; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: LCPI81_1-4, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: ## fixup A - offset: 4, value: {{LCPI.*}}, kind: reloc_riprel_4byte +; X64-AVX512VL-NEXT: vpsravd {{LCPI.*}}(%rip), %ymm0, %ymm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A] +; X64-AVX512VL-NEXT: ## fixup A - offset: 5, value: {{LCPI.*}}, kind: reloc_riprel_4byte ; X64-AVX512VL-NEXT: retq ## encoding: [0xc3] %res = call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> , <8 x i32> ) ret <8 x i32> %res } declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) nounwind readnone + define <2 x double> @test_x86_avx2_gather_d_pd(<2 x double> %a0, i8* %a1, <4 x i32> %idx, <2 x double> %mask) { ; X86-LABEL: test_x86_avx2_gather_d_pd: ; X86: ## %bb.0: Index: test/CodeGen/X86/avx512-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512-intrinsics.ll +++ test/CodeGen/X86/avx512-intrinsics.ll @@ -5226,6 +5226,22 @@ ret <16 x i32> %res } +define <16 x i32> @test_x86_avx512_psllv_d_512_const(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: test_x86_avx512_psllv_d_512_const: +; CHECK: ## %bb.0: +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm2 ## zmm2 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0] +; CHECK-NEXT: vpsllvd {{LCPI.*}}(%rip), %zmm2, %zmm2 +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm3 ## zmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295] +; CHECK-NEXT: vpsllvd {{LCPI.*}}(%rip), %zmm3, %zmm3 +; CHECK: retq + %res0 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) + %res2 = add <16 x i32> %a0, %res0 + %res1 = call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> , <16 x i32> ) + %res3 = add <16 x i32> %a1, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + define <16 x i32> @test_x86_avx512_mask_psllv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { ; CHECK-LABEL: test_x86_avx512_mask_psllv_d_512: ; CHECK: ## %bb.0: @@ -5262,6 +5278,22 @@ ret <8 x i64> %res } +define <8 x i64> @test_x86_avx512_psllv_q_512_const(<8 x i64> %a0, <8 x i64> %a1) { +; CHECK-LABEL: test_x86_avx512_psllv_q_512_const: +; CHECK: ## %bb.0: +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm2 ## zmm2 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0] +; CHECK-NEXT: vpsllvq {{LCPI.*}}(%rip), %zmm2, %zmm2 +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm3 ## zmm3 = [4,4,4,4,4,4,4,18446744073709551615] +; CHECK-NEXT: vpsllvq {{LCPI.*}}(%rip), %zmm3, %zmm3 +; CHECK: retq + %res0 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) + %res2 = add <8 x i64> %a0, %res0 + %res1 = call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> , <8 x i64> ) + %res3 = add <8 x i64> %a1, %res1 + %res4 = add <8 x i64> %res2, %res3 + ret <8 x i64> %res4 +} + define <8 x i64> @test_x86_avx512_mask_psllv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { ; CHECK-LABEL: test_x86_avx512_mask_psllv_q_512: ; CHECK: ## %bb.0: @@ -5370,6 +5402,22 @@ ret <16 x i32> %res } +define <16 x i32> @test_x86_avx512_psrlv_d_512_const(<16 x i32> %a0, <16 x i32> %a1) { +; CHECK-LABEL: test_x86_avx512_psrlv_d_512_const: +; CHECK: ## %bb.0: +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm2 ## zmm2 = [2,9,0,4294967295,3,7,4294967295,0,4,5,4294967294,0,5,3,4294967293,0] +; CHECK-NEXT: vpsrlvd {{LCPI.*}}(%rip), %zmm2, %zmm2 +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm3 ## zmm3 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4294967295] +; CHECK-NEXT: vpsrlvd {{LCPI.*}}(%rip), %zmm3, %zmm3 +; CHECK: retq + %res0 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) + %res2 = add <16 x i32> %a0, %res0 + %res1 = call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> , <16 x i32> ) + %res3 = add <16 x i32> %a1, %res1 + %res4 = add <16 x i32> %res2, %res3 + ret <16 x i32> %res4 +} + define <16 x i32> @test_x86_avx512_mask_psrlv_d_512(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) { ; CHECK-LABEL: test_x86_avx512_mask_psrlv_d_512: ; CHECK: ## %bb.0: @@ -5406,6 +5454,22 @@ ret <8 x i64> %res } +define <8 x i64> @test_x86_avx512_psrlv_q_512_const(<8 x i64> %a0, <8 x i64> %a1) { +; CHECK-LABEL: test_x86_avx512_psrlv_q_512_const: +; CHECK: ## %bb.0: +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm2 ## zmm2 = [2,9,0,18446744073709551615,3,7,18446744073709551615,0] +; CHECK-NEXT: vpsrlvq {{LCPI.*}}(%rip), %zmm2, %zmm2 +; CHECK-NEXT: vmovdqa64 {{LCPI.*}}(%rip), %zmm3 ## zmm3 = [4,4,4,4,4,4,4,18446744073709551615] +; CHECK-NEXT: vpsrlvq {{LCPI.*}}(%rip), %zmm3, %zmm3 +; CHECK: retq + %res0 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) + %res2 = add <8 x i64> %a0, %res0 + %res1 = call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> , <8 x i64> ) + %res3 = add <8 x i64> %a1, %res1 + %res4 = add <8 x i64> %res2, %res3 + ret <8 x i64> %res4 +} + define <8 x i64> @test_x86_avx512_mask_psrlv_q_512(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) { ; CHECK-LABEL: test_x86_avx512_mask_psrlv_q_512: ; CHECK: ## %bb.0: Index: test/CodeGen/X86/avx512bw-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bw-intrinsics.ll +++ test/CodeGen/X86/avx512bw-intrinsics.ll @@ -1153,6 +1153,33 @@ ret <8 x i64> %res2 } +declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) nounwind readnone + +define <32 x i16> @test_x86_avx512_psrlv_w_512_const(<32 x i16> %x0, <32 x i16> %x1) optsize { +; X86-LABEL: test_x86_avx512_psrlv_w_512_const: +; X86: # %bb.0: +; X86-NEXT: vmovdqa64 {{\.LCPI.*}}, %zmm0 # zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: retl # encoding: [0xc3] + +; X64-LABEL: test_x86_avx512_psrlv_w_512_const: +; X64: # %bb.0: +; X64-NEXT: vmovdqa64 {{\.LCPI.*}}(%rip), %zmm0 # zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{\.LCPI.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: retq # encoding: [0xc3] + + %res1 = call <32 x i16> @llvm.x86.avx512.psrlv.w.512( + <32 x i16> , + <32 x i16> ) + ret <32 x i16> %res1 +} + declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32) define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) { @@ -1348,6 +1375,32 @@ declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone +define <32 x i16> @test_x86_avx512_psllv_w_512_const(<32 x i16> %x0, <32 x i16> %x1) optsize { +; X86-LABEL: test_x86_avx512_psllv_w_512_const: +; X86: # %bb.0: +; X86-NEXT: vmovdqa64 {{\.LCPI.*}}, %zmm0 # zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: retl # encoding: [0xc3] + +; X64-LABEL: test_x86_avx512_psllv_w_512_const: +; X64: # %bb.0: +; X64-NEXT: vmovdqa64 {{\.LCPI.*}}(%rip), %zmm0 # zmm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{\.LCPI.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: retq # encoding: [0xc3] + + %res1 = call <32 x i16> @llvm.x86.avx512.psllv.w.512( + <32 x i16> , + <32 x i16> ) + ret <32 x i16> %res1 +} +declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) nounwind readnone + define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) { ; CHECK-LABEL: test_x86_avx512_pslli_w_512: ; CHECK: # %bb.0: Index: test/CodeGen/X86/avx512bwvl-intrinsics.ll =================================================================== --- test/CodeGen/X86/avx512bwvl-intrinsics.ll +++ test/CodeGen/X86/avx512bwvl-intrinsics.ll @@ -2014,6 +2014,59 @@ ret <8 x i16> %res4 } + +define <8 x i16> @test_int_x86_avx512_psrlv_w_128_const(<8 x i16> %x0, <8 x i16> %x1) optsize { +; X86-LABEL: test_int_x86_avx512_psrlv_w_128_const: +; X86: # %bb.0: +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: retl # encoding: [0xc3] + +; X64-LABEL: test_int_x86_avx512_psrlv_w_128_const: +; X64: # %bb.0: +; X64-NEXT: vmovdqa {{\.LCPI.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{\.LCPI.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.avx512.psrlv.w.128( + <8 x i16> , + <8 x i16> ) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) + +define <16 x i16> @test_int_x86_avx512_psrlv_w_256_const(<16 x i16> %x0, <16 x i16> %x1) optsize { +; X86-LABEL: test_int_x86_avx512_psrlv_w_256_const: +; X86: # %bb.0: +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsrlvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: retl # encoding: [0xc3] + +; X64-LABEL: test_int_x86_avx512_psrlv_w_256_const: +; X64: # %bb.0: +; X64-NEXT: vmovdqa {{\.LCPI.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: vpsrlvw {{\.LCPI.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x10,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: retq # encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx512.psrlv.w.256( + <16 x i16> , + <16 x i16> ) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) + declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16) define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) { @@ -2136,6 +2189,61 @@ ret <8 x i16> %res4 } +define <8 x i16> @test_int_x86_avx512_psllv_w_128_const(<8 x i16> %x0, <8 x i16> %x1) optsize { +; X86-LABEL: test_int_x86_avx512_psllv_w_128_const: +; X86: # %bb.0: +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: retl # encoding: [0xc3] + +; X64-LABEL: test_int_x86_avx512_psllv_w_128_const: +; X64: # %bb.0: +; X64-NEXT: vmovdqa {{\.LCPI.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{\.LCPI.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}4, kind: reloc_riprel_4byte +; X64-NEXT: retq # encoding: [0xc3] + %res = call <8 x i16> @llvm.x86.avx512.psllv.w.128( + <8 x i16> , + <8 x i16> ) + ret <8 x i16> %res +} + +declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) + + +define <16 x i16> @test_int_x86_avx512_psllv_w_256_const(<16 x i16> %x0, <16 x i16> %x1) optsize { +; X86-LABEL: test_int_x86_avx512_psllv_w_256_const: +; X86: # %bb.0: +; X86-NEXT: vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X86-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: vpsllvw {{\.LCPI.*}}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] +; X86-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4 +; X86-NEXT: retl # encoding: [0xc3] + +; X64-LABEL: test_int_x86_avx512_psllv_w_256_const: +; X64: # %bb.0: +; X64-NEXT: vmovdqa {{\.LCPI.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,65535] +; X64-NEXT: # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: vpsllvw {{\.LCPI.*}}(%rip), %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x28,0x12,0x05,A,A,A,A] +; X64-NEXT: # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: reloc_riprel_4byte +; X64-NEXT: retq # encoding: [0xc3] + %res = call <16 x i16> @llvm.x86.avx512.psllv.w.256( + <16 x i16> , + <16 x i16> ) + ret <16 x i16> %res +} + +declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) + + + declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>) define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {