diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1671,6 +1671,16 @@ // low bits known zero. Known.Zero.setLowBits(ShAmt); + // Attempt to avoid multi-use ops if we don't need anything from them. + if (!InDemandedMask.isAllOnesValue() || !DemandedElts.isAllOnesValue()) { + SDValue DemandedOp0 = SimplifyMultipleUseDemandedBits( + Op0, InDemandedMask, DemandedElts, TLO.DAG, Depth + 1); + if (DemandedOp0) { + SDValue NewOp = TLO.DAG.getNode(ISD::SHL, dl, VT, DemandedOp0, Op1); + return TLO.CombineTo(Op, NewOp); + } + } + // Try shrinking the operation as long as the shift amount will still be // in range. if ((ShAmt < DemandedBits.getActiveBits()) && diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll --- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll +++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll @@ -5240,77 +5240,73 @@ ; GFX6-NEXT: s_mov_b32 s7, 0xf000 ; GFX6-NEXT: s_mov_b32 s6, -1 ; GFX6-NEXT: s_waitcnt lgkmcnt(0) +; GFX6-NEXT: s_bfe_i32 s10, s2, 0xf0000 +; GFX6-NEXT: v_cvt_f32_i32_e32 v5, s10 +; GFX6-NEXT: v_mov_b32_e32 v2, s0 +; GFX6-NEXT: v_alignbit_b32 v2, s1, v2, 30 +; GFX6-NEXT: s_bfe_i32 s1, s0, 0xf0000 +; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s1 +; GFX6-NEXT: s_xor_b32 s1, s10, s1 +; GFX6-NEXT: s_ashr_i32 s1, s1, 30 +; GFX6-NEXT: s_or_b32 s1, s1, 1 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v6, v4 +; GFX6-NEXT: v_mov_b32_e32 v7, s1 +; GFX6-NEXT: s_lshr_b32 s9, s0, 15 +; GFX6-NEXT: s_bfe_i32 s1, s2, 0xf000f +; GFX6-NEXT: v_mul_f32_e32 v6, v5, v6 +; GFX6-NEXT: v_trunc_f32_e32 v6, v6 +; GFX6-NEXT: v_mad_f32 v5, -v6, v4, v5 +; GFX6-NEXT: v_cvt_i32_f32_e32 v6, v6 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v5|, |v4| +; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v7, vcc ; GFX6-NEXT: v_mov_b32_e32 v0, s2 +; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6 +; GFX6-NEXT: v_mul_lo_u32 v4, v4, s0 +; GFX6-NEXT: s_bfe_i32 s0, s0, 0xf000f +; GFX6-NEXT: v_cvt_f32_i32_e32 v5, s0 +; GFX6-NEXT: v_cvt_f32_i32_e32 v6, s1 ; GFX6-NEXT: v_alignbit_b32 v0, s3, v0, 30 ; GFX6-NEXT: s_movk_i32 s3, 0x7fff -; GFX6-NEXT: s_and_b32 s11, s0, s3 -; GFX6-NEXT: s_bfe_i32 s11, s11, 0xf0000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v2, s11 -; GFX6-NEXT: s_and_b32 s9, s2, s3 -; GFX6-NEXT: s_bfe_i32 s9, s9, 0xf0000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s9 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v4, v2 -; GFX6-NEXT: s_xor_b32 s9, s9, s11 -; GFX6-NEXT: s_ashr_i32 s9, s9, 30 -; GFX6-NEXT: s_or_b32 s9, s9, 1 -; GFX6-NEXT: v_mul_f32_e32 v4, v3, v4 -; GFX6-NEXT: v_trunc_f32_e32 v4, v4 -; GFX6-NEXT: v_mad_f32 v3, -v4, v2, v3 -; GFX6-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX6-NEXT: v_mov_b32_e32 v5, s9 -; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v3|, |v2| -; GFX6-NEXT: v_cndmask_b32_e32 v2, 0, v5, vcc -; GFX6-NEXT: v_mov_b32_e32 v1, s0 -; GFX6-NEXT: s_bfe_u32 s12, s0, 0xf000f -; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4 -; GFX6-NEXT: v_alignbit_b32 v1, s1, v1, 30 -; GFX6-NEXT: s_lshr_b32 s1, s0, 15 -; GFX6-NEXT: v_mul_lo_u32 v2, v2, s0 -; GFX6-NEXT: s_bfe_i32 s0, s12, 0xf0000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v3, s0 -; GFX6-NEXT: s_bfe_u32 s10, s2, 0xf000f -; GFX6-NEXT: s_lshr_b32 s8, s2, 15 -; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s2, v2 -; GFX6-NEXT: s_bfe_i32 s2, s10, 0xf0000 -; GFX6-NEXT: v_cvt_f32_i32_e32 v4, s2 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v5, v3 -; GFX6-NEXT: s_xor_b32 s0, s2, s0 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v7, v5 +; GFX6-NEXT: v_and_b32_e32 v3, s3, v2 +; GFX6-NEXT: v_sub_i32_e32 v4, vcc, s2, v4 +; GFX6-NEXT: v_mul_f32_e32 v7, v6, v7 +; GFX6-NEXT: v_trunc_f32_e32 v7, v7 +; GFX6-NEXT: s_xor_b32 s0, s1, s0 +; GFX6-NEXT: v_mad_f32 v6, -v7, v5, v6 +; GFX6-NEXT: v_bfe_i32 v2, v2, 0, 15 ; GFX6-NEXT: s_ashr_i32 s0, s0, 30 +; GFX6-NEXT: v_cvt_i32_f32_e32 v7, v7 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v6|, |v5| +; GFX6-NEXT: v_cvt_f32_i32_e32 v6, v2 ; GFX6-NEXT: s_or_b32 s0, s0, 1 -; GFX6-NEXT: v_mul_f32_e32 v5, v4, v5 -; GFX6-NEXT: v_trunc_f32_e32 v5, v5 -; GFX6-NEXT: v_mad_f32 v4, -v5, v3, v4 -; GFX6-NEXT: v_cvt_i32_f32_e32 v5, v5 -; GFX6-NEXT: v_and_b32_e32 v1, s3, v1 -; GFX6-NEXT: v_mov_b32_e32 v6, s0 -; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v4|, |v3| -; GFX6-NEXT: v_cndmask_b32_e32 v3, 0, v6, vcc -; GFX6-NEXT: v_bfe_i32 v4, v1, 0, 15 -; GFX6-NEXT: v_add_i32_e32 v3, vcc, v3, v5 -; GFX6-NEXT: v_cvt_f32_i32_e32 v5, v4 -; GFX6-NEXT: v_and_b32_e32 v0, s3, v0 -; GFX6-NEXT: v_bfe_i32 v6, v0, 0, 15 -; GFX6-NEXT: v_cvt_f32_i32_e32 v7, v6 -; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v5 -; GFX6-NEXT: v_xor_b32_e32 v4, v6, v4 -; GFX6-NEXT: v_ashrrev_i32_e32 v4, 30, v4 -; GFX6-NEXT: v_or_b32_e32 v4, 1, v4 -; GFX6-NEXT: v_mul_f32_e32 v6, v7, v8 -; GFX6-NEXT: v_trunc_f32_e32 v6, v6 -; GFX6-NEXT: v_mad_f32 v7, -v6, v5, v7 -; GFX6-NEXT: v_cvt_i32_f32_e32 v6, v6 -; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v7|, |v5| -; GFX6-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX6-NEXT: v_mul_lo_u32 v3, v3, s1 -; GFX6-NEXT: v_add_i32_e32 v4, vcc, v4, v6 -; GFX6-NEXT: v_mul_lo_u32 v1, v4, v1 -; GFX6-NEXT: v_sub_i32_e32 v3, vcc, s8, v3 -; GFX6-NEXT: v_and_b32_e32 v3, s3, v3 -; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, v1, v0 -; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 +; GFX6-NEXT: v_mov_b32_e32 v8, s0 +; GFX6-NEXT: v_and_b32_e32 v1, s3, v0 +; GFX6-NEXT: v_cndmask_b32_e32 v5, 0, v8, vcc +; GFX6-NEXT: v_bfe_i32 v0, v0, 0, 15 +; GFX6-NEXT: v_add_i32_e32 v5, vcc, v5, v7 +; GFX6-NEXT: v_cvt_f32_i32_e32 v7, v0 +; GFX6-NEXT: v_rcp_iflag_f32_e32 v8, v6 +; GFX6-NEXT: v_xor_b32_e32 v0, v0, v2 +; GFX6-NEXT: v_ashrrev_i32_e32 v0, 30, v0 +; GFX6-NEXT: v_or_b32_e32 v0, 1, v0 +; GFX6-NEXT: v_mul_f32_e32 v2, v7, v8 +; GFX6-NEXT: v_trunc_f32_e32 v2, v2 +; GFX6-NEXT: v_mad_f32 v7, -v2, v6, v7 +; GFX6-NEXT: v_cvt_i32_f32_e32 v2, v2 +; GFX6-NEXT: v_cmp_ge_f32_e64 vcc, |v7|, |v6| +; GFX6-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc +; GFX6-NEXT: v_mul_lo_u32 v5, v5, s9 +; GFX6-NEXT: v_add_i32_e32 v0, vcc, v0, v2 +; GFX6-NEXT: v_mul_lo_u32 v0, v0, v3 +; GFX6-NEXT: s_lshr_b32 s8, s2, 15 +; GFX6-NEXT: v_sub_i32_e32 v2, vcc, s8, v5 +; GFX6-NEXT: v_subrev_i32_e32 v0, vcc, v0, v1 ; GFX6-NEXT: v_and_b32_e32 v2, s3, v2 -; GFX6-NEXT: v_lshlrev_b32_e32 v3, 15, v3 -; GFX6-NEXT: v_or_b32_e32 v2, v2, v3 +; GFX6-NEXT: v_lshl_b64 v[0:1], v[0:1], 30 +; GFX6-NEXT: v_and_b32_e32 v3, s3, v4 +; GFX6-NEXT: v_lshlrev_b32_e32 v2, 15, v2 +; GFX6-NEXT: v_or_b32_e32 v2, v3, v2 ; GFX6-NEXT: v_or_b32_e32 v0, v2, v0 ; GFX6-NEXT: buffer_store_dword v0, off, s[4:7], 0 ; GFX6-NEXT: s_waitcnt expcnt(0) @@ -5324,82 +5320,78 @@ ; GFX9-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x24 ; GFX9-NEXT: s_movk_i32 s8, 0x7fff ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x34 +; GFX9-NEXT: v_mov_b32_e32 v2, 0 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) +; GFX9-NEXT: s_bfe_i32 s6, s2, 0xf0000 +; GFX9-NEXT: v_cvt_f32_i32_e32 v5, s6 ; GFX9-NEXT: v_mov_b32_e32 v0, s2 -; GFX9-NEXT: v_alignbit_b32 v0, s3, v0, 30 -; GFX9-NEXT: s_and_b32 s3, s2, s8 ; GFX9-NEXT: v_mov_b32_e32 v1, s0 ; GFX9-NEXT: v_alignbit_b32 v1, s1, v1, 30 -; GFX9-NEXT: s_and_b32 s1, s0, s8 -; GFX9-NEXT: s_bfe_i32 s1, s1, 0xf0000 -; GFX9-NEXT: v_cvt_f32_i32_e32 v2, s1 -; GFX9-NEXT: s_bfe_i32 s3, s3, 0xf0000 -; GFX9-NEXT: v_cvt_f32_i32_e32 v3, s3 -; GFX9-NEXT: s_xor_b32 s1, s3, s1 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v4, v2 +; GFX9-NEXT: s_bfe_i32 s1, s0, 0xf0000 +; GFX9-NEXT: v_cvt_f32_i32_e32 v4, s1 +; GFX9-NEXT: s_xor_b32 s1, s6, s1 ; GFX9-NEXT: s_ashr_i32 s1, s1, 30 -; GFX9-NEXT: s_lshr_b32 s9, s2, 15 -; GFX9-NEXT: s_bfe_u32 s10, s2, 0xf000f -; GFX9-NEXT: v_mul_f32_e32 v4, v3, v4 -; GFX9-NEXT: v_trunc_f32_e32 v4, v4 -; GFX9-NEXT: v_mad_f32 v3, -v4, v2, v3 -; GFX9-NEXT: v_cvt_i32_f32_e32 v4, v4 -; GFX9-NEXT: s_lshr_b32 s11, s0, 15 -; GFX9-NEXT: s_bfe_u32 s12, s0, 0xf000f +; GFX9-NEXT: v_alignbit_b32 v0, s3, v0, 30 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v6, v4 +; GFX9-NEXT: s_lshr_b32 s3, s2, 15 +; GFX9-NEXT: s_lshr_b32 s9, s0, 15 ; GFX9-NEXT: s_or_b32 s1, s1, 1 -; GFX9-NEXT: v_cmp_ge_f32_e64 s[6:7], |v3|, |v2| +; GFX9-NEXT: v_mul_f32_e32 v6, v5, v6 +; GFX9-NEXT: v_trunc_f32_e32 v6, v6 +; GFX9-NEXT: v_mad_f32 v5, -v6, v4, v5 +; GFX9-NEXT: v_cvt_i32_f32_e32 v6, v6 +; GFX9-NEXT: v_cmp_ge_f32_e64 s[6:7], |v5|, |v4| ; GFX9-NEXT: s_and_b64 s[6:7], s[6:7], exec ; GFX9-NEXT: s_cselect_b32 s1, s1, 0 -; GFX9-NEXT: v_add_u32_e32 v2, s1, v4 -; GFX9-NEXT: s_bfe_i32 s1, s12, 0xf0000 -; GFX9-NEXT: v_cvt_f32_i32_e32 v3, s1 -; GFX9-NEXT: v_mul_lo_u32 v2, v2, s0 -; GFX9-NEXT: s_bfe_i32 s0, s10, 0xf0000 -; GFX9-NEXT: v_cvt_f32_i32_e32 v4, s0 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v5, v3 +; GFX9-NEXT: v_add_u32_e32 v4, s1, v6 +; GFX9-NEXT: s_bfe_i32 s1, s0, 0xf000f +; GFX9-NEXT: v_cvt_f32_i32_e32 v5, s1 +; GFX9-NEXT: v_mul_lo_u32 v4, v4, s0 +; GFX9-NEXT: s_bfe_i32 s0, s2, 0xf000f +; GFX9-NEXT: v_cvt_f32_i32_e32 v6, s0 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v7, v5 ; GFX9-NEXT: s_xor_b32 s0, s0, s1 +; GFX9-NEXT: v_and_b32_e32 v3, s8, v1 ; GFX9-NEXT: s_ashr_i32 s0, s0, 30 -; GFX9-NEXT: s_or_b32 s3, s0, 1 -; GFX9-NEXT: v_mul_f32_e32 v5, v4, v5 -; GFX9-NEXT: v_trunc_f32_e32 v5, v5 -; GFX9-NEXT: v_mad_f32 v4, -v5, v3, v4 -; GFX9-NEXT: v_cvt_i32_f32_e32 v5, v5 -; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v4|, |v3| -; GFX9-NEXT: v_and_b32_e32 v1, s8, v1 +; GFX9-NEXT: v_mul_f32_e32 v7, v6, v7 +; GFX9-NEXT: v_trunc_f32_e32 v7, v7 +; GFX9-NEXT: v_mad_f32 v6, -v7, v5, v6 +; GFX9-NEXT: v_cvt_i32_f32_e32 v7, v7 +; GFX9-NEXT: v_bfe_i32 v1, v1, 0, 15 +; GFX9-NEXT: s_or_b32 s6, s0, 1 +; GFX9-NEXT: v_cmp_ge_f32_e64 s[0:1], |v6|, |v5| +; GFX9-NEXT: v_cvt_f32_i32_e32 v6, v1 ; GFX9-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GFX9-NEXT: s_cselect_b32 s0, s3, 0 -; GFX9-NEXT: v_bfe_i32 v4, v1, 0, 15 -; GFX9-NEXT: v_add_u32_e32 v3, s0, v5 -; GFX9-NEXT: v_cvt_f32_i32_e32 v5, v4 +; GFX9-NEXT: s_cselect_b32 s0, s6, 0 +; GFX9-NEXT: v_add_u32_e32 v5, s0, v7 +; GFX9-NEXT: v_bfe_i32 v7, v0, 0, 15 +; GFX9-NEXT: v_cvt_f32_i32_e32 v8, v7 +; GFX9-NEXT: v_rcp_iflag_f32_e32 v9, v6 +; GFX9-NEXT: v_xor_b32_e32 v1, v7, v1 +; GFX9-NEXT: v_ashrrev_i32_e32 v1, 30, v1 +; GFX9-NEXT: v_or_b32_e32 v1, 1, v1 +; GFX9-NEXT: v_mul_f32_e32 v7, v8, v9 +; GFX9-NEXT: v_trunc_f32_e32 v7, v7 +; GFX9-NEXT: v_cvt_i32_f32_e32 v9, v7 +; GFX9-NEXT: v_mad_f32 v7, -v7, v6, v8 +; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v7|, |v6| +; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: v_mul_lo_u32 v5, v5, s9 +; GFX9-NEXT: v_add_u32_e32 v1, v9, v1 +; GFX9-NEXT: v_mul_lo_u32 v1, v1, v3 ; GFX9-NEXT: v_and_b32_e32 v0, s8, v0 -; GFX9-NEXT: v_bfe_i32 v6, v0, 0, 15 -; GFX9-NEXT: v_cvt_f32_i32_e32 v7, v6 -; GFX9-NEXT: v_rcp_iflag_f32_e32 v8, v5 -; GFX9-NEXT: v_xor_b32_e32 v4, v6, v4 -; GFX9-NEXT: v_ashrrev_i32_e32 v4, 30, v4 -; GFX9-NEXT: v_or_b32_e32 v4, 1, v4 -; GFX9-NEXT: v_mul_f32_e32 v6, v7, v8 -; GFX9-NEXT: v_trunc_f32_e32 v6, v6 -; GFX9-NEXT: v_cvt_i32_f32_e32 v8, v6 -; GFX9-NEXT: v_mad_f32 v6, -v6, v5, v7 -; GFX9-NEXT: v_cmp_ge_f32_e64 vcc, |v6|, |v5| -; GFX9-NEXT: v_cndmask_b32_e32 v4, 0, v4, vcc -; GFX9-NEXT: v_mul_lo_u32 v3, v3, s11 -; GFX9-NEXT: v_add_u32_e32 v4, v8, v4 -; GFX9-NEXT: v_mul_lo_u32 v1, v4, v1 -; GFX9-NEXT: v_sub_u32_e32 v2, s2, v2 -; GFX9-NEXT: v_sub_u32_e32 v3, s9, v3 -; GFX9-NEXT: v_and_b32_e32 v3, s8, v3 +; GFX9-NEXT: v_sub_u32_e32 v3, s2, v4 +; GFX9-NEXT: v_sub_u32_e32 v4, s3, v5 ; GFX9-NEXT: v_sub_u32_e32 v0, v0, v1 +; GFX9-NEXT: v_and_b32_e32 v4, s8, v4 ; GFX9-NEXT: v_lshlrev_b64 v[0:1], 30, v[0:1] -; GFX9-NEXT: v_and_b32_e32 v2, s8, v2 -; GFX9-NEXT: v_lshlrev_b32_e32 v3, 15, v3 -; GFX9-NEXT: v_or_b32_e32 v2, v2, v3 -; GFX9-NEXT: v_mov_b32_e32 v4, 0 -; GFX9-NEXT: v_or_b32_e32 v0, v2, v0 -; GFX9-NEXT: global_store_dword v4, v0, s[4:5] +; GFX9-NEXT: v_and_b32_e32 v3, s8, v3 +; GFX9-NEXT: v_lshlrev_b32_e32 v4, 15, v4 +; GFX9-NEXT: v_or_b32_e32 v3, v3, v4 +; GFX9-NEXT: v_or_b32_e32 v0, v3, v0 +; GFX9-NEXT: global_store_dword v2, v0, s[4:5] ; GFX9-NEXT: v_and_b32_e32 v0, 0x1fff, v1 -; GFX9-NEXT: global_store_short v4, v0, s[4:5] offset:4 +; GFX9-NEXT: global_store_short v2, v0, s[4:5] offset:4 ; GFX9-NEXT: s_endpgm %r = srem <3 x i15> %x, %y store <3 x i15> %r, <3 x i15> addrspace(1)* %out diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll --- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll +++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll @@ -1605,10 +1605,10 @@ ; VI-NEXT: flat_load_dwordx2 v[0:1], v[0:1] ; VI-NEXT: s_mov_b64 s[2:3], 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 -; VI-NEXT: s_and_b32 s1, s4, s2 +; VI-NEXT: s_lshl_b32 s1, s4, 16 +; VI-NEXT: s_and_b32 s4, s4, s2 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 -; VI-NEXT: s_lshl_b32 s0, s1, 16 -; VI-NEXT: s_or_b32 s0, s1, s0 +; VI-NEXT: s_or_b32 s0, s4, s1 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc ; VI-NEXT: v_lshlrev_b32_e32 v4, 4, v4 ; VI-NEXT: v_lshlrev_b64 v[4:5], v4, s[2:3] @@ -1690,11 +1690,11 @@ ; VI-NEXT: s_mov_b64 s[2:3], 0xffff ; VI-NEXT: v_mov_b32_e32 v3, s1 ; VI-NEXT: s_lshl_b32 s1, s5, 4 +; VI-NEXT: s_lshl_b32 s5, s4, 16 ; VI-NEXT: s_and_b32 s4, s4, s2 ; VI-NEXT: v_add_u32_e32 v2, vcc, s0, v2 ; VI-NEXT: s_lshl_b64 s[0:1], s[2:3], s1 -; VI-NEXT: s_lshl_b32 s2, s4, 16 -; VI-NEXT: s_or_b32 s2, s4, s2 +; VI-NEXT: s_or_b32 s2, s4, s5 ; VI-NEXT: v_mov_b32_e32 v4, s2 ; VI-NEXT: v_mov_b32_e32 v5, s2 ; VI-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc diff --git a/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/Mips/urem-seteq-illegal-types.ll @@ -148,48 +148,48 @@ define i1 @test_urem_oversized(i66 %X) nounwind { ; MIPSEL-LABEL: test_urem_oversized: ; MIPSEL: # %bb.0: -; MIPSEL-NEXT: lui $1, 12057 -; MIPSEL-NEXT: ori $1, $1, 37186 +; MIPSEL-NEXT: lui $1, 52741 +; MIPSEL-NEXT: ori $1, $1, 40665 ; MIPSEL-NEXT: multu $6, $1 -; MIPSEL-NEXT: mflo $1 ; MIPSEL-NEXT: mfhi $2 -; MIPSEL-NEXT: lui $3, 52741 -; MIPSEL-NEXT: ori $3, $3, 40665 -; MIPSEL-NEXT: multu $6, $3 -; MIPSEL-NEXT: mflo $7 -; MIPSEL-NEXT: mfhi $8 -; MIPSEL-NEXT: multu $5, $3 -; MIPSEL-NEXT: mfhi $9 +; MIPSEL-NEXT: mflo $3 +; MIPSEL-NEXT: multu $5, $1 +; MIPSEL-NEXT: mfhi $7 +; MIPSEL-NEXT: mflo $8 +; MIPSEL-NEXT: lui $9, 12057 +; MIPSEL-NEXT: ori $9, $9, 37186 +; MIPSEL-NEXT: multu $6, $9 ; MIPSEL-NEXT: mflo $10 -; MIPSEL-NEXT: addu $8, $10, $8 -; MIPSEL-NEXT: addu $11, $1, $8 -; MIPSEL-NEXT: sltu $8, $8, $10 -; MIPSEL-NEXT: sll $10, $11, 31 -; MIPSEL-NEXT: sltu $1, $11, $1 -; MIPSEL-NEXT: srl $12, $7, 1 -; MIPSEL-NEXT: sll $7, $7, 1 -; MIPSEL-NEXT: addu $1, $2, $1 -; MIPSEL-NEXT: or $10, $12, $10 -; MIPSEL-NEXT: srl $2, $11, 1 -; MIPSEL-NEXT: addu $8, $9, $8 -; MIPSEL-NEXT: mul $3, $4, $3 -; MIPSEL-NEXT: sll $4, $6, 1 -; MIPSEL-NEXT: sll $5, $5, 1 +; MIPSEL-NEXT: mfhi $11 +; MIPSEL-NEXT: addu $2, $8, $2 +; MIPSEL-NEXT: addu $12, $10, $2 +; MIPSEL-NEXT: sltu $2, $2, $8 +; MIPSEL-NEXT: addu $2, $7, $2 +; MIPSEL-NEXT: sltu $7, $12, $10 +; MIPSEL-NEXT: sll $8, $12, 31 +; MIPSEL-NEXT: srl $10, $12, 1 +; MIPSEL-NEXT: sll $12, $3, 1 +; MIPSEL-NEXT: srl $3, $3, 1 +; MIPSEL-NEXT: mul $1, $4, $1 +; MIPSEL-NEXT: mul $4, $5, $9 +; MIPSEL-NEXT: sll $5, $6, 1 ; MIPSEL-NEXT: lui $6, 60010 -; MIPSEL-NEXT: ori $6, $6, 61135 -; MIPSEL-NEXT: addu $1, $8, $1 -; MIPSEL-NEXT: subu $1, $1, $5 -; MIPSEL-NEXT: addu $3, $4, $3 -; MIPSEL-NEXT: addu $1, $1, $3 -; MIPSEL-NEXT: andi $1, $1, 3 -; MIPSEL-NEXT: sll $3, $1, 31 -; MIPSEL-NEXT: or $3, $2, $3 -; MIPSEL-NEXT: sltiu $2, $3, 13 -; MIPSEL-NEXT: xori $3, $3, 13 -; MIPSEL-NEXT: sltu $4, $10, $6 -; MIPSEL-NEXT: movz $2, $4, $3 +; MIPSEL-NEXT: addu $7, $11, $7 +; MIPSEL-NEXT: addu $2, $2, $7 +; MIPSEL-NEXT: addu $2, $4, $2 +; MIPSEL-NEXT: addu $1, $5, $1 +; MIPSEL-NEXT: addu $1, $2, $1 +; MIPSEL-NEXT: sll $2, $1, 31 +; MIPSEL-NEXT: or $4, $10, $2 +; MIPSEL-NEXT: sltiu $2, $4, 13 +; MIPSEL-NEXT: xori $4, $4, 13 +; MIPSEL-NEXT: or $3, $3, $8 +; MIPSEL-NEXT: ori $5, $6, 61135 +; MIPSEL-NEXT: sltu $3, $3, $5 +; MIPSEL-NEXT: movz $2, $3, $4 +; MIPSEL-NEXT: andi $1, $1, 2 ; MIPSEL-NEXT: srl $1, $1, 1 -; MIPSEL-NEXT: or $1, $1, $7 +; MIPSEL-NEXT: or $1, $1, $12 ; MIPSEL-NEXT: andi $1, $1, 3 ; MIPSEL-NEXT: jr $ra ; MIPSEL-NEXT: movn $2, $zero, $1 @@ -213,12 +213,12 @@ ; MIPS64EL-NEXT: daddiu $5, $5, -4401 ; MIPS64EL-NEXT: dsll $4, $4, 1 ; MIPS64EL-NEXT: daddu $3, $3, $4 -; MIPS64EL-NEXT: daddu $2, $3, $2 -; MIPS64EL-NEXT: andi $3, $2, 3 +; MIPS64EL-NEXT: daddu $3, $3, $2 ; MIPS64EL-NEXT: dsll $2, $3, 63 ; MIPS64EL-NEXT: dsrl $4, $1, 1 ; MIPS64EL-NEXT: or $2, $4, $2 ; MIPS64EL-NEXT: sltu $2, $2, $5 +; MIPS64EL-NEXT: andi $3, $3, 2 ; MIPS64EL-NEXT: dsrl $3, $3, 1 ; MIPS64EL-NEXT: dsll $1, $1, 1 ; MIPS64EL-NEXT: or $1, $3, $1 diff --git a/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll b/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll --- a/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp-zbkb.ll @@ -14,6 +14,11 @@ ; RV32I-NEXT: slli a1, a1, 16 ; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret +; +; RV32ZBP-ZBKB-LABEL: pack_i32: +; RV32ZBP-ZBKB: # %bb.0: +; RV32ZBP-ZBKB-NEXT: pack a0, a0, a1 +; RV32ZBP-ZBKB-NEXT: ret %shl = and i32 %a, 65535 %shl1 = shl i32 %b, 16 %or = or i32 %shl1, %shl diff --git a/llvm/test/CodeGen/RISCV/rv32zbp.ll b/llvm/test/CodeGen/RISCV/rv32zbp.ll --- a/llvm/test/CodeGen/RISCV/rv32zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv32zbp.ll @@ -1034,18 +1034,30 @@ ; RV32I-NEXT: addi a4, a4, 819 ; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: or a0, a3, a0 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: slli a1, a0, 2 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 2 +; RV32I-NEXT: or a1, a0, a1 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: and a0, a0, a2 +; RV32I-NEXT: srli a2, a1, 2 ; RV32I-NEXT: and a2, a2, a4 -; RV32I-NEXT: or a0, a2, a0 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: or a1, a2, a1 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc2b_i32: ; RV32ZBP: # %bb.0: +; RV32ZBP-NEXT: srli a1, a0, 2 +; RV32ZBP-NEXT: or a1, a1, a0 ; RV32ZBP-NEXT: orc2.n a0, a0 +; RV32ZBP-NEXT: slli a1, a1, 2 +; RV32ZBP-NEXT: lui a2, 838861 +; RV32ZBP-NEXT: addi a2, a2, -820 +; RV32ZBP-NEXT: and a1, a1, a2 +; RV32ZBP-NEXT: srli a2, a0, 2 +; RV32ZBP-NEXT: lui a3, 209715 +; RV32ZBP-NEXT: addi a3, a3, 819 +; RV32ZBP-NEXT: and a2, a2, a3 +; RV32ZBP-NEXT: or a0, a2, a0 +; RV32ZBP-NEXT: or a0, a0, a1 ; RV32ZBP-NEXT: ret %and1 = shl i32 %a, 2 %shl1 = and i32 %and1, -858993460 @@ -1066,40 +1078,61 @@ define i64 @gorc2b_i64(i64 %a) nounwind { ; RV32I-LABEL: gorc2b_i64: ; RV32I: # %bb.0: -; RV32I-NEXT: slli a2, a1, 2 -; RV32I-NEXT: slli a3, a0, 2 +; RV32I-NEXT: slli a2, a0, 2 +; RV32I-NEXT: slli a3, a1, 2 ; RV32I-NEXT: lui a4, 838861 ; RV32I-NEXT: addi a4, a4, -820 ; RV32I-NEXT: and a3, a3, a4 ; RV32I-NEXT: and a2, a2, a4 -; RV32I-NEXT: srli a5, a0, 2 -; RV32I-NEXT: srli a6, a1, 2 +; RV32I-NEXT: srli a5, a1, 2 +; RV32I-NEXT: srli a6, a0, 2 ; RV32I-NEXT: lui a7, 209715 ; RV32I-NEXT: addi a7, a7, 819 ; RV32I-NEXT: and a6, a6, a7 ; RV32I-NEXT: and a5, a5, a7 -; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a1, a6, a1 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: or a0, a0, a3 -; RV32I-NEXT: slli a2, a0, 2 -; RV32I-NEXT: slli a3, a1, 2 -; RV32I-NEXT: and a3, a3, a4 -; RV32I-NEXT: and a2, a2, a4 -; RV32I-NEXT: srli a4, a1, 2 -; RV32I-NEXT: srli a5, a0, 2 +; RV32I-NEXT: or a1, a5, a1 +; RV32I-NEXT: or a0, a6, a0 +; RV32I-NEXT: or a2, a0, a2 +; RV32I-NEXT: or a3, a1, a3 +; RV32I-NEXT: slli a0, a0, 2 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: and a1, a1, a4 +; RV32I-NEXT: and a0, a0, a4 +; RV32I-NEXT: srli a4, a3, 2 +; RV32I-NEXT: srli a5, a2, 2 ; RV32I-NEXT: and a5, a5, a7 ; RV32I-NEXT: and a4, a4, a7 -; RV32I-NEXT: or a1, a4, a1 -; RV32I-NEXT: or a0, a5, a0 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a1, a1, a3 +; RV32I-NEXT: or a3, a4, a3 +; RV32I-NEXT: or a2, a5, a2 +; RV32I-NEXT: or a0, a2, a0 +; RV32I-NEXT: or a1, a3, a1 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: gorc2b_i64: ; RV32ZBP: # %bb.0: -; RV32ZBP-NEXT: orc2.n a0, a0 +; RV32ZBP-NEXT: srli a2, a1, 2 +; RV32ZBP-NEXT: srli a3, a0, 2 +; RV32ZBP-NEXT: lui a4, 209715 +; RV32ZBP-NEXT: addi a4, a4, 819 +; RV32ZBP-NEXT: and a3, a3, a4 +; RV32ZBP-NEXT: or a3, a3, a0 +; RV32ZBP-NEXT: or a2, a2, a1 ; RV32ZBP-NEXT: orc2.n a1, a1 +; RV32ZBP-NEXT: orc2.n a0, a0 +; RV32ZBP-NEXT: slli a2, a2, 2 +; RV32ZBP-NEXT: slli a3, a3, 2 +; RV32ZBP-NEXT: lui a5, 838861 +; RV32ZBP-NEXT: addi a5, a5, -820 +; RV32ZBP-NEXT: and a3, a3, a5 +; RV32ZBP-NEXT: and a2, a2, a5 +; RV32ZBP-NEXT: srli a5, a0, 2 +; RV32ZBP-NEXT: srli a6, a1, 2 +; RV32ZBP-NEXT: and a6, a6, a4 +; RV32ZBP-NEXT: and a4, a5, a4 +; RV32ZBP-NEXT: or a0, a4, a0 +; RV32ZBP-NEXT: or a1, a6, a1 +; RV32ZBP-NEXT: or a1, a1, a2 +; RV32ZBP-NEXT: or a0, a0, a3 ; RV32ZBP-NEXT: ret %and1 = shl i64 %a, 2 %shl1 = and i64 %and1, -3689348814741910324 @@ -2680,21 +2713,18 @@ define i32 @bswap_rotr_i32(i32 %a) { ; RV32I-LABEL: bswap_rotr_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 +; RV32I-NEXT: slli a1, a0, 8 +; RV32I-NEXT: lui a2, 4080 ; RV32I-NEXT: and a1, a1, a2 +; RV32I-NEXT: slli a2, a0, 24 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 -; RV32I-NEXT: slli a2, a0, 8 -; RV32I-NEXT: lui a3, 4080 -; RV32I-NEXT: and a2, a2, a3 -; RV32I-NEXT: slli a0, a0, 24 -; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: slli a1, a0, 16 ; RV32I-NEXT: srli a0, a0, 16 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 8 +; RV32I-NEXT: or a0, a0, a2 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srli a1, a1, 16 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: bswap_rotr_i32: @@ -2709,21 +2739,18 @@ define i32 @bswap_rotl_i32(i32 %a) { ; RV32I-LABEL: bswap_rotl_i32: ; RV32I: # %bb.0: -; RV32I-NEXT: srli a1, a0, 8 -; RV32I-NEXT: lui a2, 16 -; RV32I-NEXT: addi a2, a2, -256 -; RV32I-NEXT: and a1, a1, a2 -; RV32I-NEXT: srli a2, a0, 24 -; RV32I-NEXT: or a1, a1, a2 +; RV32I-NEXT: srli a1, a0, 24 +; RV32I-NEXT: srli a2, a0, 16 +; RV32I-NEXT: slli a2, a2, 8 +; RV32I-NEXT: or a1, a2, a1 ; RV32I-NEXT: slli a2, a0, 8 ; RV32I-NEXT: lui a3, 4080 ; RV32I-NEXT: and a2, a2, a3 ; RV32I-NEXT: slli a0, a0, 24 ; RV32I-NEXT: or a0, a0, a2 -; RV32I-NEXT: or a0, a0, a1 -; RV32I-NEXT: srli a1, a0, 16 -; RV32I-NEXT: slli a0, a0, 16 -; RV32I-NEXT: or a0, a0, a1 +; RV32I-NEXT: srli a0, a0, 16 +; RV32I-NEXT: slli a1, a1, 16 +; RV32I-NEXT: or a0, a1, a0 ; RV32I-NEXT: ret ; ; RV32ZBP-LABEL: bswap_rotl_i32: diff --git a/llvm/test/CodeGen/RISCV/rv64zbp.ll b/llvm/test/CodeGen/RISCV/rv64zbp.ll --- a/llvm/test/CodeGen/RISCV/rv64zbp.ll +++ b/llvm/test/CodeGen/RISCV/rv64zbp.ll @@ -942,18 +942,31 @@ ; RV64I-NEXT: addiw a4, a4, 819 ; RV64I-NEXT: and a3, a3, a4 ; RV64I-NEXT: or a0, a3, a0 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slliw a1, a0, 2 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 2 +; RV64I-NEXT: or a1, a0, a1 +; RV64I-NEXT: slliw a0, a0, 2 +; RV64I-NEXT: and a0, a0, a2 +; RV64I-NEXT: srli a2, a1, 2 ; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a1, a2, a1 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: gorc2b_i32: ; RV64ZBP: # %bb.0: -; RV64ZBP-NEXT: gorciw a0, a0, 2 +; RV64ZBP-NEXT: srliw a1, a0, 2 +; RV64ZBP-NEXT: or a1, a1, a0 +; RV64ZBP-NEXT: orc2.n a0, a0 +; RV64ZBP-NEXT: slli a1, a1, 2 +; RV64ZBP-NEXT: lui a2, 838861 +; RV64ZBP-NEXT: addiw a2, a2, -820 +; RV64ZBP-NEXT: and a1, a1, a2 +; RV64ZBP-NEXT: srli a2, a0, 2 +; RV64ZBP-NEXT: lui a3, 209715 +; RV64ZBP-NEXT: addiw a3, a3, 819 +; RV64ZBP-NEXT: and a2, a2, a3 +; RV64ZBP-NEXT: or a0, a2, a0 +; RV64ZBP-NEXT: or a0, a0, a1 +; RV64ZBP-NEXT: sext.w a0, a0 ; RV64ZBP-NEXT: ret %and1 = shl i32 %a, 2 %shl1 = and i32 %and1, -858993460 @@ -983,18 +996,30 @@ ; RV64I-NEXT: srli a4, a0, 2 ; RV64I-NEXT: and a4, a4, a2 ; RV64I-NEXT: or a0, a4, a0 -; RV64I-NEXT: or a0, a0, a3 -; RV64I-NEXT: slli a3, a0, 2 -; RV64I-NEXT: and a1, a3, a1 -; RV64I-NEXT: srli a3, a0, 2 -; RV64I-NEXT: and a2, a3, a2 -; RV64I-NEXT: or a0, a2, a0 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: or a3, a0, a3 +; RV64I-NEXT: slli a0, a0, 2 +; RV64I-NEXT: and a0, a0, a1 +; RV64I-NEXT: srli a1, a3, 2 +; RV64I-NEXT: and a1, a1, a2 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: gorc2b_i64: ; RV64ZBP: # %bb.0: +; RV64ZBP-NEXT: srli a1, a0, 2 +; RV64ZBP-NEXT: or a1, a1, a0 ; RV64ZBP-NEXT: orc2.n a0, a0 +; RV64ZBP-NEXT: lui a2, %hi(.LCPI26_0) +; RV64ZBP-NEXT: ld a2, %lo(.LCPI26_0)(a2) +; RV64ZBP-NEXT: lui a3, %hi(.LCPI26_1) +; RV64ZBP-NEXT: ld a3, %lo(.LCPI26_1)(a3) +; RV64ZBP-NEXT: slli a1, a1, 2 +; RV64ZBP-NEXT: and a1, a1, a2 +; RV64ZBP-NEXT: srli a2, a0, 2 +; RV64ZBP-NEXT: and a2, a2, a3 +; RV64ZBP-NEXT: or a0, a2, a0 +; RV64ZBP-NEXT: or a0, a0, a1 ; RV64ZBP-NEXT: ret %and1 = shl i64 %a, 2 %shl1 = and i64 %and1, -3689348814741910324 @@ -2801,37 +2826,35 @@ define i64 @bswap_rotr_i64(i64 %a) { ; RV64I-LABEL: bswap_rotr_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 -; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: li a3, 255 -; RV64I-NEXT: slli a4, a3, 24 -; RV64I-NEXT: and a2, a2, a4 +; RV64I-NEXT: slli a1, a0, 24 +; RV64I-NEXT: li a2, 255 +; RV64I-NEXT: slli a3, a2, 40 +; RV64I-NEXT: and a1, a1, a3 +; RV64I-NEXT: srliw a3, a0, 24 +; RV64I-NEXT: slli a3, a3, 32 +; RV64I-NEXT: or a1, a1, a3 +; RV64I-NEXT: slli a3, a0, 40 +; RV64I-NEXT: slli a2, a2, 48 +; RV64I-NEXT: and a2, a3, a2 +; RV64I-NEXT: slli a3, a0, 56 +; RV64I-NEXT: or a2, a3, a2 ; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -256 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srli a4, a0, 56 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: or a1, a1, a2 -; RV64I-NEXT: slli a2, a0, 24 -; RV64I-NEXT: slli a4, a3, 40 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srliw a4, a0, 24 -; RV64I-NEXT: slli a4, a4, 32 -; RV64I-NEXT: or a2, a2, a4 -; RV64I-NEXT: slli a4, a0, 40 -; RV64I-NEXT: slli a3, a3, 48 -; RV64I-NEXT: and a3, a4, a3 -; RV64I-NEXT: slli a0, a0, 56 +; RV64I-NEXT: lui a3, 16 +; RV64I-NEXT: addiw a3, a3, -256 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srli a3, a0, 56 +; RV64I-NEXT: or a2, a2, a3 +; RV64I-NEXT: srli a3, a0, 24 +; RV64I-NEXT: lui a4, 4080 +; RV64I-NEXT: and a3, a3, a4 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a0, a0, 24 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: slli a1, a0, 32 -; RV64I-NEXT: srli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: bswap_rotr_i64: @@ -2846,22 +2869,21 @@ define i64 @bswap_rotl_i64(i64 %a) { ; RV64I-LABEL: bswap_rotl_i64: ; RV64I: # %bb.0: -; RV64I-NEXT: srli a1, a0, 24 -; RV64I-NEXT: lui a2, 4080 +; RV64I-NEXT: srli a1, a0, 40 +; RV64I-NEXT: lui a2, 16 +; RV64I-NEXT: addiw a2, a2, -256 ; RV64I-NEXT: and a1, a1, a2 -; RV64I-NEXT: srli a2, a0, 8 -; RV64I-NEXT: li a3, 255 -; RV64I-NEXT: slli a4, a3, 24 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: or a1, a2, a1 -; RV64I-NEXT: srli a2, a0, 40 -; RV64I-NEXT: lui a4, 16 -; RV64I-NEXT: addiw a4, a4, -256 -; RV64I-NEXT: and a2, a2, a4 -; RV64I-NEXT: srli a4, a0, 56 -; RV64I-NEXT: or a2, a2, a4 +; RV64I-NEXT: srli a2, a0, 56 ; RV64I-NEXT: or a1, a1, a2 +; RV64I-NEXT: srli a2, a0, 24 +; RV64I-NEXT: lui a3, 4080 +; RV64I-NEXT: and a2, a2, a3 +; RV64I-NEXT: srli a3, a0, 32 +; RV64I-NEXT: slli a3, a3, 24 +; RV64I-NEXT: or a2, a3, a2 +; RV64I-NEXT: or a1, a2, a1 ; RV64I-NEXT: slli a2, a0, 24 +; RV64I-NEXT: li a3, 255 ; RV64I-NEXT: slli a4, a3, 40 ; RV64I-NEXT: and a2, a2, a4 ; RV64I-NEXT: srliw a4, a0, 24 @@ -2873,10 +2895,9 @@ ; RV64I-NEXT: slli a0, a0, 56 ; RV64I-NEXT: or a0, a0, a3 ; RV64I-NEXT: or a0, a0, a2 -; RV64I-NEXT: or a0, a0, a1 -; RV64I-NEXT: srli a1, a0, 32 -; RV64I-NEXT: slli a0, a0, 32 -; RV64I-NEXT: or a0, a0, a1 +; RV64I-NEXT: srli a0, a0, 32 +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: or a0, a1, a0 ; RV64I-NEXT: ret ; ; RV64ZBP-LABEL: bswap_rotl_i64: diff --git a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/RISCV/srem-seteq-illegal-types.ll @@ -303,7 +303,7 @@ ; RV32-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32-NEXT: mv s0, a0 ; RV32-NEXT: lw a0, 4(a0) -; RV32-NEXT: lbu a1, 12(s0) +; RV32-NEXT: lb a1, 12(s0) ; RV32-NEXT: lw a2, 8(s0) ; RV32-NEXT: andi a3, a0, 1 ; RV32-NEXT: neg s1, a3 @@ -385,17 +385,15 @@ ; RV64-NEXT: lwu a1, 8(s0) ; RV64-NEXT: slli a0, a0, 32 ; RV64-NEXT: or a0, a1, a0 -; RV64-NEXT: slli a0, a0, 24 -; RV64-NEXT: srli a0, a0, 24 -; RV64-NEXT: ld a1, 0(s0) -; RV64-NEXT: slli a2, a0, 29 -; RV64-NEXT: srai s1, a2, 31 -; RV64-NEXT: slli a0, a0, 31 -; RV64-NEXT: srli a2, a1, 33 -; RV64-NEXT: or a0, a2, a0 +; RV64-NEXT: ld a2, 0(s0) +; RV64-NEXT: slli a0, a0, 29 +; RV64-NEXT: srai s1, a0, 31 +; RV64-NEXT: slli a0, a1, 31 +; RV64-NEXT: srli a1, a2, 33 +; RV64-NEXT: or a0, a1, a0 ; RV64-NEXT: slli a0, a0, 31 ; RV64-NEXT: srai a0, a0, 31 -; RV64-NEXT: slli a1, a1, 31 +; RV64-NEXT: slli a1, a2, 31 ; RV64-NEXT: srai s2, a1, 31 ; RV64-NEXT: li a1, 7 ; RV64-NEXT: call __moddi3@plt @@ -420,21 +418,20 @@ ; RV64-NEXT: addi a2, s3, -1 ; RV64-NEXT: snez a2, a2 ; RV64-NEXT: neg a0, a0 -; RV64-NEXT: neg a2, a2 -; RV64-NEXT: neg a3, a1 -; RV64-NEXT: slli a3, a3, 29 -; RV64-NEXT: srli a3, a3, 61 -; RV64-NEXT: sb a3, 12(s0) +; RV64-NEXT: neg a3, a2 +; RV64-NEXT: neg a4, a1 +; RV64-NEXT: slli a4, a4, 29 +; RV64-NEXT: srli a4, a4, 61 +; RV64-NEXT: sb a4, 12(s0) ; RV64-NEXT: slliw a1, a1, 2 -; RV64-NEXT: li a3, -1 -; RV64-NEXT: srli a3, a3, 31 -; RV64-NEXT: and a2, a2, a3 -; RV64-NEXT: srli a4, a2, 31 -; RV64-NEXT: subw a1, a4, a1 +; RV64-NEXT: slli a3, a3, 31 +; RV64-NEXT: srli a3, a3, 62 +; RV64-NEXT: subw a1, a3, a1 ; RV64-NEXT: sw a1, 8(s0) -; RV64-NEXT: and a0, a0, a3 +; RV64-NEXT: slli a0, a0, 31 +; RV64-NEXT: srli a0, a0, 31 ; RV64-NEXT: slli a1, a2, 33 -; RV64-NEXT: or a0, a0, a1 +; RV64-NEXT: sub a0, a0, a1 ; RV64-NEXT: sd a0, 0(s0) ; RV64-NEXT: ld ra, 40(sp) # 8-byte Folded Reload ; RV64-NEXT: ld s0, 32(sp) # 8-byte Folded Reload @@ -457,7 +454,7 @@ ; RV32M-NEXT: sw s6, 0(sp) # 4-byte Folded Spill ; RV32M-NEXT: mv s0, a0 ; RV32M-NEXT: lw a0, 4(a0) -; RV32M-NEXT: lbu a1, 12(s0) +; RV32M-NEXT: lb a1, 12(s0) ; RV32M-NEXT: lw a2, 8(s0) ; RV32M-NEXT: andi a3, a0, 1 ; RV32M-NEXT: neg s1, a3 @@ -532,19 +529,17 @@ ; RV64M-NEXT: lwu a2, 8(a0) ; RV64M-NEXT: slli a1, a1, 32 ; RV64M-NEXT: or a1, a2, a1 -; RV64M-NEXT: slli a1, a1, 24 -; RV64M-NEXT: srli a1, a1, 24 -; RV64M-NEXT: ld a2, 0(a0) -; RV64M-NEXT: slli a3, a1, 29 -; RV64M-NEXT: srai a3, a3, 31 -; RV64M-NEXT: slli a1, a1, 31 -; RV64M-NEXT: srli a4, a2, 33 +; RV64M-NEXT: ld a3, 0(a0) +; RV64M-NEXT: slli a1, a1, 29 +; RV64M-NEXT: srai a1, a1, 31 +; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: srli a4, a3, 33 ; RV64M-NEXT: lui a5, %hi(.LCPI3_0) ; RV64M-NEXT: ld a5, %lo(.LCPI3_0)(a5) -; RV64M-NEXT: or a1, a4, a1 -; RV64M-NEXT: slli a1, a1, 31 -; RV64M-NEXT: srai a1, a1, 31 -; RV64M-NEXT: mulh a4, a1, a5 +; RV64M-NEXT: or a2, a4, a2 +; RV64M-NEXT: slli a2, a2, 31 +; RV64M-NEXT: srai a2, a2, 31 +; RV64M-NEXT: mulh a4, a2, a5 ; RV64M-NEXT: srli a5, a4, 63 ; RV64M-NEXT: srai a4, a4, 1 ; RV64M-NEXT: add a4, a4, a5 @@ -552,47 +547,46 @@ ; RV64M-NEXT: sub a4, a4, a5 ; RV64M-NEXT: lui a5, %hi(.LCPI3_1) ; RV64M-NEXT: ld a5, %lo(.LCPI3_1)(a5) -; RV64M-NEXT: slli a2, a2, 31 -; RV64M-NEXT: srai a2, a2, 31 -; RV64M-NEXT: add a1, a1, a4 -; RV64M-NEXT: mulh a4, a3, a5 +; RV64M-NEXT: slli a3, a3, 31 +; RV64M-NEXT: srai a3, a3, 31 +; RV64M-NEXT: add a2, a2, a4 +; RV64M-NEXT: mulh a4, a1, a5 ; RV64M-NEXT: srli a5, a4, 63 ; RV64M-NEXT: srai a4, a4, 1 ; RV64M-NEXT: add a4, a4, a5 ; RV64M-NEXT: slli a5, a4, 2 ; RV64M-NEXT: add a4, a5, a4 -; RV64M-NEXT: add a3, a3, a4 -; RV64M-NEXT: addi a3, a3, -2 -; RV64M-NEXT: snez a3, a3 +; RV64M-NEXT: add a1, a1, a4 +; RV64M-NEXT: addi a1, a1, -2 +; RV64M-NEXT: snez a1, a1 ; RV64M-NEXT: lui a4, %hi(.LCPI3_2) ; RV64M-NEXT: ld a4, %lo(.LCPI3_2)(a4) ; RV64M-NEXT: lui a5, %hi(.LCPI3_3) ; RV64M-NEXT: ld a5, %lo(.LCPI3_3)(a5) -; RV64M-NEXT: addi a1, a1, -1 -; RV64M-NEXT: snez a1, a1 -; RV64M-NEXT: mul a2, a2, a4 -; RV64M-NEXT: add a2, a2, a5 -; RV64M-NEXT: slli a4, a2, 63 -; RV64M-NEXT: srli a2, a2, 1 -; RV64M-NEXT: or a2, a2, a4 -; RV64M-NEXT: sltu a2, a5, a2 -; RV64M-NEXT: neg a1, a1 -; RV64M-NEXT: neg a4, a3 -; RV64M-NEXT: neg a2, a2 -; RV64M-NEXT: slli a4, a4, 29 -; RV64M-NEXT: srli a4, a4, 61 -; RV64M-NEXT: sb a4, 12(a0) -; RV64M-NEXT: slliw a3, a3, 2 -; RV64M-NEXT: li a4, -1 -; RV64M-NEXT: srli a4, a4, 31 -; RV64M-NEXT: and a1, a1, a4 -; RV64M-NEXT: srli a5, a1, 31 -; RV64M-NEXT: subw a3, a5, a3 -; RV64M-NEXT: sw a3, 8(a0) -; RV64M-NEXT: slli a1, a1, 33 -; RV64M-NEXT: and a2, a2, a4 -; RV64M-NEXT: or a1, a2, a1 -; RV64M-NEXT: sd a1, 0(a0) +; RV64M-NEXT: addi a2, a2, -1 +; RV64M-NEXT: snez a2, a2 +; RV64M-NEXT: mul a3, a3, a4 +; RV64M-NEXT: add a3, a3, a5 +; RV64M-NEXT: slli a4, a3, 63 +; RV64M-NEXT: srli a3, a3, 1 +; RV64M-NEXT: or a3, a3, a4 +; RV64M-NEXT: sltu a3, a5, a3 +; RV64M-NEXT: neg a4, a2 +; RV64M-NEXT: neg a5, a1 +; RV64M-NEXT: neg a3, a3 +; RV64M-NEXT: slli a2, a2, 33 +; RV64M-NEXT: slli a3, a3, 31 +; RV64M-NEXT: srli a3, a3, 31 +; RV64M-NEXT: sub a2, a3, a2 +; RV64M-NEXT: sd a2, 0(a0) +; RV64M-NEXT: slli a2, a5, 29 +; RV64M-NEXT: srli a2, a2, 61 +; RV64M-NEXT: sb a2, 12(a0) +; RV64M-NEXT: slliw a1, a1, 2 +; RV64M-NEXT: slli a2, a4, 31 +; RV64M-NEXT: srli a2, a2, 62 +; RV64M-NEXT: subw a1, a2, a1 +; RV64M-NEXT: sw a1, 8(a0) ; RV64M-NEXT: ret ; ; RV32MV-LABEL: test_srem_vec: @@ -613,7 +607,7 @@ ; RV32MV-NEXT: slli a2, a0, 31 ; RV32MV-NEXT: srli a3, a1, 1 ; RV32MV-NEXT: or s3, a3, a2 -; RV32MV-NEXT: lbu a2, 12(s2) +; RV32MV-NEXT: lb a2, 12(s2) ; RV32MV-NEXT: slli a3, a0, 30 ; RV32MV-NEXT: srli a3, a3, 31 ; RV32MV-NEXT: neg s4, a3 @@ -706,18 +700,16 @@ ; RV64MV-NEXT: sd s0, 80(sp) # 8-byte Folded Spill ; RV64MV-NEXT: addi s0, sp, 96 ; RV64MV-NEXT: andi sp, sp, -32 -; RV64MV-NEXT: lb a1, 12(a0) -; RV64MV-NEXT: lwu a2, 8(a0) -; RV64MV-NEXT: slli a1, a1, 32 -; RV64MV-NEXT: or a1, a2, a1 +; RV64MV-NEXT: lwu a1, 8(a0) ; RV64MV-NEXT: ld a2, 0(a0) -; RV64MV-NEXT: slli a1, a1, 24 -; RV64MV-NEXT: srli a1, a1, 24 ; RV64MV-NEXT: slli a3, a1, 31 ; RV64MV-NEXT: srli a4, a2, 33 +; RV64MV-NEXT: lb a5, 12(a0) ; RV64MV-NEXT: or a3, a4, a3 ; RV64MV-NEXT: slli a3, a3, 31 ; RV64MV-NEXT: srai a3, a3, 31 +; RV64MV-NEXT: slli a4, a5, 32 +; RV64MV-NEXT: or a1, a1, a4 ; RV64MV-NEXT: lui a4, %hi(.LCPI3_0) ; RV64MV-NEXT: ld a4, %lo(.LCPI3_0)(a4) ; RV64MV-NEXT: slli a1, a1, 29 @@ -769,18 +761,18 @@ ; RV64MV-NEXT: slli a3, a2, 31 ; RV64MV-NEXT: srli a3, a3, 61 ; RV64MV-NEXT: sb a3, 12(a0) -; RV64MV-NEXT: slli a2, a2, 2 -; RV64MV-NEXT: vslidedown.vi v10, v8, 1 -; RV64MV-NEXT: vmv.x.s a3, v10 -; RV64MV-NEXT: and a3, a3, a1 -; RV64MV-NEXT: srli a4, a3, 31 -; RV64MV-NEXT: or a2, a4, a2 -; RV64MV-NEXT: sw a2, 8(a0) -; RV64MV-NEXT: vmv.x.s a2, v8 -; RV64MV-NEXT: and a1, a2, a1 -; RV64MV-NEXT: slli a2, a3, 33 -; RV64MV-NEXT: or a1, a1, a2 +; RV64MV-NEXT: vmv.x.s a3, v8 +; RV64MV-NEXT: and a1, a3, a1 +; RV64MV-NEXT: vslidedown.vi v8, v8, 1 +; RV64MV-NEXT: vmv.x.s a3, v8 +; RV64MV-NEXT: slli a4, a3, 33 +; RV64MV-NEXT: or a1, a1, a4 ; RV64MV-NEXT: sd a1, 0(a0) +; RV64MV-NEXT: slli a1, a2, 2 +; RV64MV-NEXT: slli a2, a3, 31 +; RV64MV-NEXT: srli a2, a2, 62 +; RV64MV-NEXT: or a1, a2, a1 +; RV64MV-NEXT: sw a1, 8(a0) ; RV64MV-NEXT: addi sp, s0, -96 ; RV64MV-NEXT: ld ra, 88(sp) # 8-byte Folded Reload ; RV64MV-NEXT: ld s0, 80(sp) # 8-byte Folded Reload diff --git a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll --- a/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll +++ b/llvm/test/CodeGen/RISCV/vec3-setcc-crash.ll @@ -12,30 +12,27 @@ ; RV32-LABEL: vec3_setcc_crash: ; RV32: # %bb.0: ; RV32-NEXT: lw a0, 0(a0) -; RV32-NEXT: lui a2, 16 -; RV32-NEXT: addi a2, a2, -256 -; RV32-NEXT: and a2, a0, a2 -; RV32-NEXT: slli a3, a2, 16 +; RV32-NEXT: slli a2, a0, 8 +; RV32-NEXT: slli a3, a0, 24 +; RV32-NEXT: slli a4, a0, 16 +; RV32-NEXT: srai a5, a4, 24 ; RV32-NEXT: srai a3, a3, 24 -; RV32-NEXT: slli a4, a0, 24 -; RV32-NEXT: srai a6, a4, 24 -; RV32-NEXT: slli a4, a0, 8 -; RV32-NEXT: mv a5, a0 -; RV32-NEXT: bgtz a6, .LBB0_2 +; RV32-NEXT: bgtz a5, .LBB0_2 ; RV32-NEXT: # %bb.1: ; RV32-NEXT: li a5, 0 +; RV32-NEXT: j .LBB0_3 ; RV32-NEXT: .LBB0_2: -; RV32-NEXT: srai a4, a4, 24 -; RV32-NEXT: andi a5, a5, 255 -; RV32-NEXT: bgtz a3, .LBB0_4 -; RV32-NEXT: # %bb.3: -; RV32-NEXT: li a2, 0 -; RV32-NEXT: j .LBB0_5 -; RV32-NEXT: .LBB0_4: -; RV32-NEXT: srli a2, a2, 8 +; RV32-NEXT: srli a5, a4, 24 +; RV32-NEXT: .LBB0_3: +; RV32-NEXT: srai a4, a2, 24 +; RV32-NEXT: slli a2, a5, 8 +; RV32-NEXT: mv a5, a0 +; RV32-NEXT: bgtz a3, .LBB0_5 +; RV32-NEXT: # %bb.4: +; RV32-NEXT: li a5, 0 ; RV32-NEXT: .LBB0_5: -; RV32-NEXT: slli a2, a2, 8 -; RV32-NEXT: or a2, a5, a2 +; RV32-NEXT: andi a3, a5, 255 +; RV32-NEXT: or a2, a3, a2 ; RV32-NEXT: bgtz a4, .LBB0_7 ; RV32-NEXT: # %bb.6: ; RV32-NEXT: li a0, 0 @@ -50,30 +47,27 @@ ; RV64-LABEL: vec3_setcc_crash: ; RV64: # %bb.0: ; RV64-NEXT: lwu a0, 0(a0) -; RV64-NEXT: lui a2, 16 -; RV64-NEXT: addiw a2, a2, -256 -; RV64-NEXT: and a2, a0, a2 -; RV64-NEXT: slli a3, a2, 48 +; RV64-NEXT: slli a2, a0, 40 +; RV64-NEXT: slli a3, a0, 56 +; RV64-NEXT: slli a4, a0, 48 +; RV64-NEXT: srai a5, a4, 56 ; RV64-NEXT: srai a3, a3, 56 -; RV64-NEXT: slli a4, a0, 56 -; RV64-NEXT: srai a6, a4, 56 -; RV64-NEXT: slli a4, a0, 40 -; RV64-NEXT: mv a5, a0 -; RV64-NEXT: bgtz a6, .LBB0_2 +; RV64-NEXT: bgtz a5, .LBB0_2 ; RV64-NEXT: # %bb.1: ; RV64-NEXT: li a5, 0 +; RV64-NEXT: j .LBB0_3 ; RV64-NEXT: .LBB0_2: -; RV64-NEXT: srai a4, a4, 56 -; RV64-NEXT: andi a5, a5, 255 -; RV64-NEXT: bgtz a3, .LBB0_4 -; RV64-NEXT: # %bb.3: -; RV64-NEXT: li a2, 0 -; RV64-NEXT: j .LBB0_5 -; RV64-NEXT: .LBB0_4: -; RV64-NEXT: srli a2, a2, 8 +; RV64-NEXT: srli a5, a4, 56 +; RV64-NEXT: .LBB0_3: +; RV64-NEXT: srai a4, a2, 56 +; RV64-NEXT: slli a2, a5, 8 +; RV64-NEXT: mv a5, a0 +; RV64-NEXT: bgtz a3, .LBB0_5 +; RV64-NEXT: # %bb.4: +; RV64-NEXT: li a5, 0 ; RV64-NEXT: .LBB0_5: -; RV64-NEXT: slli a2, a2, 8 -; RV64-NEXT: or a2, a5, a2 +; RV64-NEXT: andi a3, a5, 255 +; RV64-NEXT: or a2, a3, a2 ; RV64-NEXT: bgtz a4, .LBB0_7 ; RV64-NEXT: # %bb.6: ; RV64-NEXT: li a0, 0 diff --git a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll --- a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll +++ b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll @@ -12,7 +12,7 @@ ; CHECK-NEXT: nihh %r1, 4095 ; CHECK-NEXT: stg %r1, 0(%r2) ; CHECK-NEXT: vlgvf %r1, %v24, 2 -; CHECK-NEXT: risbgn %r0, %r0, 0, 129, 62 +; CHECK-NEXT: sllg %r0, %r0, 62 ; CHECK-NEXT: rosbg %r0, %r1, 2, 32, 31 ; CHECK-NEXT: vlgvf %r1, %v24, 3 ; CHECK-NEXT: rosbg %r0, %r1, 33, 63, 0 @@ -77,38 +77,40 @@ ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 ; CHECK-NEXT: vlgvf %r0, %v26, 3 -; CHECK-NEXT: vlgvf %r4, %v24, 1 -; CHECK-NEXT: vlgvf %r3, %v24, 2 +; CHECK-NEXT: vlgvf %r5, %v24, 2 ; CHECK-NEXT: srlk %r1, %r0, 8 -; CHECK-NEXT: vlgvf %r5, %v24, 0 +; CHECK-NEXT: vlgvf %r3, %v24, 3 ; CHECK-NEXT: sth %r1, 28(%r2) -; CHECK-NEXT: risbgn %r1, %r4, 0, 133, 58 -; CHECK-NEXT: sllg %r5, %r5, 25 +; CHECK-NEXT: vlgvf %r1, %v26, 0 +; CHECK-NEXT: risbgn %r14, %r5, 6, 164, 27 +; CHECK-NEXT: sllg %r4, %r3, 60 ; CHECK-NEXT: stc %r0, 30(%r2) -; CHECK-NEXT: rosbg %r1, %r3, 6, 36, 27 -; CHECK-NEXT: vlgvf %r3, %v24, 3 -; CHECK-NEXT: rosbg %r5, %r4, 39, 63, 58 -; CHECK-NEXT: sllg %r4, %r5, 8 -; CHECK-NEXT: rosbg %r1, %r3, 37, 63, 60 -; CHECK-NEXT: vlgvf %r5, %v26, 1 -; CHECK-NEXT: rosbg %r4, %r1, 56, 63, 8 -; CHECK-NEXT: stg %r4, 0(%r2) -; CHECK-NEXT: vlgvf %r4, %v26, 2 -; CHECK-NEXT: risbgn %r14, %r5, 0, 129, 62 -; CHECK-NEXT: risbgn %r3, %r3, 0, 131, 60 -; CHECK-NEXT: rosbg %r14, %r4, 2, 32, 31 -; CHECK-NEXT: rosbg %r14, %r0, 33, 63, 0 -; CHECK-NEXT: srlg %r0, %r14, 24 -; CHECK-NEXT: st %r0, 24(%r2) -; CHECK-NEXT: vlgvf %r0, %v26, 0 -; CHECK-NEXT: rosbg %r3, %r0, 4, 34, 29 -; CHECK-NEXT: sllg %r0, %r1, 8 -; CHECK-NEXT: rosbg %r3, %r5, 35, 63, 62 -; CHECK-NEXT: rosbg %r0, %r3, 56, 63, 8 -; CHECK-NEXT: stg %r0, 8(%r2) -; CHECK-NEXT: sllg %r0, %r3, 8 -; CHECK-NEXT: rosbg %r0, %r14, 56, 63, 8 +; CHECK-NEXT: rosbg %r14, %r3, 37, 63, 60 +; CHECK-NEXT: sllg %r3, %r14, 8 +; CHECK-NEXT: rosbg %r4, %r1, 4, 34, 29 +; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8 +; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: vlgvf %r3, %v24, 1 +; CHECK-NEXT: sllg %r4, %r3, 58 +; CHECK-NEXT: rosbg %r4, %r5, 6, 36, 27 +; CHECK-NEXT: vlgvf %r5, %v24, 0 +; CHECK-NEXT: sllg %r5, %r5, 25 +; CHECK-NEXT: rosbg %r5, %r3, 39, 63, 58 +; CHECK-NEXT: sllg %r3, %r5, 8 +; CHECK-NEXT: rosbg %r3, %r4, 56, 63, 8 +; CHECK-NEXT: vlgvf %r4, %v26, 1 +; CHECK-NEXT: stg %r3, 0(%r2) +; CHECK-NEXT: vlgvf %r3, %v26, 2 +; CHECK-NEXT: sllg %r5, %r4, 62 +; CHECK-NEXT: rosbg %r5, %r3, 2, 32, 31 +; CHECK-NEXT: rosbg %r5, %r0, 33, 63, 0 +; CHECK-NEXT: risbgn %r0, %r1, 4, 162, 29 +; CHECK-NEXT: rosbg %r0, %r4, 35, 63, 62 +; CHECK-NEXT: sllg %r0, %r0, 8 +; CHECK-NEXT: rosbg %r0, %r5, 56, 63, 8 ; CHECK-NEXT: stg %r0, 16(%r2) +; CHECK-NEXT: srlg %r0, %r5, 24 +; CHECK-NEXT: st %r0, 24(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 { @@ -125,16 +127,11 @@ ; CHECK-NEXT: lg %r1, 0(%r2) ; CHECK-NEXT: sllg %r2, %r1, 32 ; CHECK-NEXT: lr %r2, %r0 -; CHECK-NEXT: srlg %r0, %r2, 62 -; CHECK-NEXT: st %r2, 8(%r3) -; CHECK-NEXT: rosbg %r0, %r1, 33, 61, 34 -; CHECK-NEXT: sllg %r1, %r0, 62 -; CHECK-NEXT: rosbg %r1, %r2, 2, 32, 0 -; CHECK-NEXT: srlg %r1, %r1, 32 -; CHECK-NEXT: sllg %r0, %r0, 30 -; CHECK-NEXT: lr %r0, %r1 -; CHECK-NEXT: nihh %r0, 8191 -; CHECK-NEXT: stg %r0, 0(%r3) +; CHECK-NEXT: st %r0, 8(%r3) +; CHECK-NEXT: srlg %r0, %r2, 32 +; CHECK-NEXT: lr %r1, %r0 +; CHECK-NEXT: nihh %r1, 8191 +; CHECK-NEXT: stg %r1, 0(%r3) ; CHECK-NEXT: br %r14 { %tmp = load <3 x i31>, <3 x i31>* %src diff --git a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll --- a/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptosi-sat-vector.ll @@ -1851,32 +1851,33 @@ ; CHECK-NEXT: vpush {d8, d9, d10, d11} ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: mov r8, r0 -; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r0, s17 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: vmov r0, s19 ; CHECK-NEXT: vldr s20, .LCPI28_0 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: vmov r4, s16 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movtlt r7, #65534 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: mov r7, r0 -; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s22, .LCPI28_1 ; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: vcmp.f32 s17, s22 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movtlt r6, #65534 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movtlt r5, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 +; CHECK-NEXT: movwgt r7, #65535 +; CHECK-NEXT: movtgt r7, #1 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: mov r4, r1 @@ -1887,8 +1888,8 @@ ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #1 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: it gt @@ -1898,27 +1899,46 @@ ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s19, s22 ; CHECK-NEXT: str.w r0, [r8] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s19, s22 +; CHECK-NEXT: movlt r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 ; CHECK-NEXT: vcmp.f32 s19, s19 +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r6, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s17, s17 ; CHECK-NEXT: itt vs -; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: lsls r0, r6, #22 -; CHECK-NEXT: orr.w r1, r0, r7, lsr #10 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: str.w r1, [r8, #20] -; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: lsls r0, r5, #22 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s17, s20 -; CHECK-NEXT: lsrs r2, r6, #10 +; CHECK-NEXT: orr.w r0, r0, r6, lsr #10 +; CHECK-NEXT: str.w r0, [r8, #20] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt.w r9, #0 ; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: lsr.w r0, r9, #14 +; CHECK-NEXT: orr.w r1, r0, r7, lsl #18 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: str.w r1, [r8, #8] +; CHECK-NEXT: bl __aeabi_f2lz +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: lsrs r2, r5, #10 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 @@ -1933,60 +1953,40 @@ ; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: movtlt r4, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r4, #65535 ; CHECK-NEXT: movtgt r4, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s22 +; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: strb.w r2, [r8, #24] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: ubfx r2, r7, #14, #4 ; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: orr.w r2, r2, r0, lsl #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str.w r2, [r8, #12] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: bfc r4, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: orr.w r2, r4, r0, lsl #18 +; CHECK-NEXT: orr.w r2, r4, r9, lsl #18 ; CHECK-NEXT: str.w r2, [r8, #4] -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s17 -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r9, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: vcmp.f32 s18, s18 +; CHECK-NEXT: lsrs r0, r0, #28 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: lsrs r0, r0, #14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: lsr.w r2, r1, #14 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #18 -; CHECK-NEXT: orr.w r2, r2, r9, lsl #4 -; CHECK-NEXT: str.w r2, [r8, #12] -; CHECK-NEXT: str.w r0, [r8, #8] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: lsr.w r0, r9, #28 -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #22 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 +; CHECK-NEXT: orr.w r0, r0, r6, lsl #22 ; CHECK-NEXT: str.w r0, [r8, #16] ; CHECK-NEXT: vpop {d8, d9, d10, d11} ; CHECK-NEXT: add sp, #4 @@ -4883,104 +4883,100 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: .vsave {d8} ; CHECK-NEXT: vpush {d8} -; CHECK-NEXT: vldr s5, .LCPI46_0 +; CHECK-NEXT: vldr s12, .LCPI46_0 ; CHECK-NEXT: vcvtt.f32.f16 s15, s3 -; CHECK-NEXT: vldr s7, .LCPI46_1 -; CHECK-NEXT: vcvtb.f32.f16 s8, s2 -; CHECK-NEXT: vmaxnm.f32 s16, s15, s5 +; CHECK-NEXT: vldr s14, .LCPI46_1 +; CHECK-NEXT: vcvtb.f32.f16 s7, s0 +; CHECK-NEXT: vmaxnm.f32 s16, s15, s12 ; CHECK-NEXT: vcvtb.f32.f16 s4, s1 -; CHECK-NEXT: vcvtt.f32.f16 s12, s1 -; CHECK-NEXT: vcvtb.f32.f16 s1, s0 -; CHECK-NEXT: vminnm.f32 s16, s16, s7 +; CHECK-NEXT: vcvtt.f32.f16 s8, s1 +; CHECK-NEXT: vcvtb.f32.f16 s1, s2 ; CHECK-NEXT: vcvtt.f32.f16 s0, s0 ; CHECK-NEXT: vcvtt.f32.f16 s2, s2 ; CHECK-NEXT: vcvtb.f32.f16 s3, s3 -; CHECK-NEXT: vmaxnm.f32 s6, s4, s5 -; CHECK-NEXT: vmaxnm.f32 s10, s8, s5 -; CHECK-NEXT: vmaxnm.f32 s14, s12, s5 -; CHECK-NEXT: vmaxnm.f32 s9, s1, s5 -; CHECK-NEXT: vmaxnm.f32 s11, s0, s5 -; CHECK-NEXT: vmaxnm.f32 s13, s2, s5 -; CHECK-NEXT: vmaxnm.f32 s5, s3, s5 +; CHECK-NEXT: vmaxnm.f32 s6, s4, s12 +; CHECK-NEXT: vmaxnm.f32 s10, s8, s12 +; CHECK-NEXT: vmaxnm.f32 s5, s1, s12 +; CHECK-NEXT: vmaxnm.f32 s9, s7, s12 +; CHECK-NEXT: vmaxnm.f32 s11, s0, s12 +; CHECK-NEXT: vmaxnm.f32 s13, s2, s12 +; CHECK-NEXT: vminnm.f32 s16, s16, s14 +; CHECK-NEXT: vmaxnm.f32 s12, s3, s12 ; CHECK-NEXT: vcvt.s32.f32 s16, s16 -; CHECK-NEXT: vminnm.f32 s5, s5, s7 -; CHECK-NEXT: vminnm.f32 s13, s13, s7 -; CHECK-NEXT: vcvt.s32.f32 s5, s5 -; CHECK-NEXT: vminnm.f32 s11, s11, s7 +; CHECK-NEXT: vminnm.f32 s12, s12, s14 +; CHECK-NEXT: vminnm.f32 s13, s13, s14 +; CHECK-NEXT: vcvt.s32.f32 s12, s12 +; CHECK-NEXT: vminnm.f32 s9, s9, s14 ; CHECK-NEXT: vcvt.s32.f32 s13, s13 -; CHECK-NEXT: vminnm.f32 s9, s9, s7 -; CHECK-NEXT: vcmp.f32 s15, s15 -; CHECK-NEXT: vminnm.f32 s10, s10, s7 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vminnm.f32 s11, s11, s14 ; CHECK-NEXT: vcvt.s32.f32 s11, s11 -; CHECK-NEXT: vcmp.f32 s3, s3 -; CHECK-NEXT: vminnm.f32 s14, s14, s7 +; CHECK-NEXT: vminnm.f32 s5, s5, s14 +; CHECK-NEXT: vcvt.s32.f32 s9, s9 +; CHECK-NEXT: vminnm.f32 s10, s10, s14 +; CHECK-NEXT: vcmp.f32 s15, s15 +; CHECK-NEXT: vminnm.f32 s6, s6, s14 ; CHECK-NEXT: vmov r1, s16 -; CHECK-NEXT: vminnm.f32 s6, s6, s7 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 ; CHECK-NEXT: lsrs r2, r1, #11 +; CHECK-NEXT: vcmp.f32 s3, s3 ; CHECK-NEXT: strb r2, [r0, #18] -; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: vmov r3, s12 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r2, #0 -; CHECK-NEXT: vcvt.s32.f32 s9, s9 -; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: movvs r3, #0 +; CHECK-NEXT: ubfx r2, r3, #14, #5 +; CHECK-NEXT: vcvt.s32.f32 s5, s5 +; CHECK-NEXT: orr.w r1, r2, r1, lsl #5 ; CHECK-NEXT: vcmp.f32 s2, s2 -; CHECK-NEXT: vmov r12, s13 -; CHECK-NEXT: lsrs r3, r2, #14 -; CHECK-NEXT: orr.w r1, r3, r1, lsl #5 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: strh r1, [r0, #16] -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r12, #0 -; CHECK-NEXT: vcvt.s32.f32 s10, s10 -; CHECK-NEXT: bfc r12, #19, #13 -; CHECK-NEXT: vcvt.s32.f32 s14, s14 -; CHECK-NEXT: lsr.w r3, r12, #1 -; CHECK-NEXT: vcmp.f32 s0, s0 -; CHECK-NEXT: vmov lr, s11 -; CHECK-NEXT: orr.w r2, r3, r2, lsl #18 +; CHECK-NEXT: vmov lr, s13 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str r2, [r0, #12] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs.w lr, #0 -; CHECK-NEXT: vcmp.f32 s1, s1 +; CHECK-NEXT: ubfx r1, lr, #1, #18 +; CHECK-NEXT: vcmp.f32 s0, s0 +; CHECK-NEXT: orr.w r1, r1, r3, lsl #18 +; CHECK-NEXT: vcvt.s32.f32 s10, s10 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vmov r12, s11 +; CHECK-NEXT: str r1, [r0, #12] ; CHECK-NEXT: vmov r3, s9 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs.w r12, #0 +; CHECK-NEXT: vcmp.f32 s7, s7 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 -; CHECK-NEXT: bfc lr, #19, #13 ; CHECK-NEXT: bfc r3, #19, #13 -; CHECK-NEXT: vcmp.f32 s12, s12 -; CHECK-NEXT: orr.w r3, r3, lr, lsl #19 ; CHECK-NEXT: vcvt.s32.f32 s6, s6 +; CHECK-NEXT: orr.w r3, r3, r12, lsl #19 ; CHECK-NEXT: str r3, [r0] +; CHECK-NEXT: vcmp.f32 s1, s1 +; CHECK-NEXT: vmov r3, s5 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r3, s14 -; CHECK-NEXT: vmov r1, s10 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r3, #0 ; CHECK-NEXT: vcmp.f32 s8, s8 ; CHECK-NEXT: bfc r3, #19, #13 +; CHECK-NEXT: vmov r1, s10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: lsrs r2, r3, #7 +; CHECK-NEXT: ubfx r2, r1, #7, #12 ; CHECK-NEXT: vcmp.f32 s4, s4 -; CHECK-NEXT: orr.w r1, r2, r1, lsl #12 +; CHECK-NEXT: orr.w r2, r2, r3, lsl #12 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: orr.w r1, r1, r12, lsl #31 -; CHECK-NEXT: str r1, [r0, #8] -; CHECK-NEXT: vmov r1, s6 -; CHECK-NEXT: lsr.w r2, lr, #13 +; CHECK-NEXT: orr.w r2, r2, lr, lsl #31 +; CHECK-NEXT: str r2, [r0, #8] +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: ubfx r3, r12, #13, #6 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: bfc r1, #19, #13 -; CHECK-NEXT: orr.w r1, r2, r1, lsl #6 -; CHECK-NEXT: orr.w r1, r1, r3, lsl #25 +; CHECK-NEXT: movvs r2, #0 +; CHECK-NEXT: bfc r2, #19, #13 +; CHECK-NEXT: orr.w r2, r3, r2, lsl #6 +; CHECK-NEXT: orr.w r1, r2, r1, lsl #25 ; CHECK-NEXT: str r1, [r0, #4] ; CHECK-NEXT: vpop {d8} ; CHECK-NEXT: pop {r7, pc} @@ -5033,42 +5029,42 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14, d15} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r10, r0 -; CHECK-NEXT: vcvtt.f32.f16 s30, s19 -; CHECK-NEXT: vmov r0, s30 +; CHECK-NEXT: mov r11, r0 +; CHECK-NEXT: vcvtt.f32.f16 s28, s19 +; CHECK-NEXT: vmov r0, s28 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcvtb.f32.f16 s26, s18 -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vldr s22, .LCPI48_1 ; CHECK-NEXT: vcvtb.f32.f16 s24, s16 -; CHECK-NEXT: vcvtt.f32.f16 s28, s17 -; CHECK-NEXT: vcmp.f32 s30, s22 -; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: vcmp.f32 s28, s22 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vmov r7, s24 +; CHECK-NEXT: vmov r6, s24 ; CHECK-NEXT: vldr s20, .LCPI48_0 -; CHECK-NEXT: vmov r8, s28 +; CHECK-NEXT: vmov r5, s18 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movtlt r6, #65534 +; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movtlt r4, #65534 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s26, s22 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #1 +; CHECK-NEXT: movwgt r4, #65535 +; CHECK-NEXT: movtgt r4, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 @@ -5076,11 +5072,11 @@ ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 -; CHECK-NEXT: str.w r0, [r10, #25] -; CHECK-NEXT: mov r0, r7 +; CHECK-NEXT: str.w r0, [r11, #25] +; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: it lt @@ -5090,99 +5086,97 @@ ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s30, s22 +; CHECK-NEXT: vcmp.f32 s28, s22 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: str.w r0, [r11] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s30, s20 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s28, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r4, #-1 -; CHECK-NEXT: vcmp.f32 s30, s30 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: vcmp.f32 s28, s28 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: str r4, [sp, #4] @ 4-byte Spill +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: str r7, [sp, #12] @ 4-byte Spill ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: lsls r0, r6, #22 -; CHECK-NEXT: orr.w r7, r0, r4, lsr #10 -; CHECK-NEXT: mov r0, r8 +; CHECK-NEXT: movvs r4, #0 +; CHECK-NEXT: lsls r0, r4, #22 +; CHECK-NEXT: orr.w r7, r0, r7, lsr #10 +; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: mov r6, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: movtlt r4, #65534 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movtlt r6, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s22 -; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r4, #65535 -; CHECK-NEXT: movtgt r4, #1 +; CHECK-NEXT: movwgt r6, #65535 +; CHECK-NEXT: movtgt r6, #1 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: str.w r7, [r11, #45] +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r6, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r7, [r10, #45] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 +; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: vcmp.f32 s28, s28 +; CHECK-NEXT: movgt.w r5, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r1, #0 -; CHECK-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: lsls r0, r4, #22 -; CHECK-NEXT: orr.w r0, r0, r1, lsr #10 -; CHECK-NEXT: str.w r0, [r10, #20] +; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: lsrs r0, r5, #14 +; CHECK-NEXT: orr.w r0, r0, r6, lsl #18 +; CHECK-NEXT: vcvtt.f32.f16 s18, s17 +; CHECK-NEXT: str.w r0, [r11, #33] ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: lsrs r1, r6, #10 -; CHECK-NEXT: strb.w r1, [r10, #49] ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s18, s22 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: movvs r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: movtlt r5, #65534 -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: movwlt r9, #0 +; CHECK-NEXT: movtlt r9, #65534 +; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #1 -; CHECK-NEXT: vcmp.f32 s26, s26 +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #1 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vcvtt.f32.f16 s16, s16 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: mov r7, r1 -; CHECK-NEXT: orr.w r0, r5, r9, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #29] +; CHECK-NEXT: movvs.w r9, #0 +; CHECK-NEXT: lsl.w r0, r9, #22 +; CHECK-NEXT: orr.w r0, r0, r1, lsr #10 +; CHECK-NEXT: str.w r0, [r11, #20] ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: lsrs r1, r4, #10 -; CHECK-NEXT: strb.w r1, [r10, #24] ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: mov r8, r0 @@ -5195,105 +5189,109 @@ ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r8, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs.w r8, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt -; CHECK-NEXT: movwlt r11, #0 -; CHECK-NEXT: movtlt r11, #65534 -; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: movwlt r10, #0 +; CHECK-NEXT: movtlt r10, #65534 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: lsr.w r0, r8, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 +; CHECK-NEXT: vcmp.f32 s16, s16 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r11, #65535 -; CHECK-NEXT: movtgt r11, #1 +; CHECK-NEXT: movwgt r10, #65535 +; CHECK-NEXT: movtgt r10, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs.w r11, #0 -; CHECK-NEXT: vcvtb.f32.f16 s24, s19 -; CHECK-NEXT: bfc r11, #18, #14 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: orr.w r0, r11, r8, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #4] -; CHECK-NEXT: vmov r0, s24 +; CHECK-NEXT: movvs.w r10, #0 +; CHECK-NEXT: orr.w r0, r0, r10, lsl #18 +; CHECK-NEXT: str.w r0, [r11, #8] +; CHECK-NEXT: lsrs r0, r4, #10 +; CHECK-NEXT: vcvtb.f32.f16 s16, s19 +; CHECK-NEXT: strb.w r0, [r11, #49] +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __aeabi_f2lz -; CHECK-NEXT: vcmp.f32 s24, s22 -; CHECK-NEXT: mov r5, r0 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: ubfx r0, r6, #14, #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r5, #-1 +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: vcvtb.f32.f16 s18, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s22 ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r5, #0 +; CHECK-NEXT: movvs r7, #0 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 +; CHECK-NEXT: str.w r0, [r11, #37] +; CHECK-NEXT: vcmp.f32 s26, s22 +; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: movtlt r7, #65534 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: movtlt r0, #65534 +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: mov r4, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #1 +; CHECK-NEXT: movwgt r0, #65535 +; CHECK-NEXT: movtgt r0, #1 +; CHECK-NEXT: vcmp.f32 s26, s26 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r7, #0 -; CHECK-NEXT: vcvtb.f32.f16 s18, s17 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: lsrs r0, r7, #14 -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: str.w r0, [r10, #37] -; CHECK-NEXT: lsr.w r0, r9, #14 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #33] +; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: bfc r0, #18, #14 +; CHECK-NEXT: orr.w r0, r0, r5, lsl #18 +; CHECK-NEXT: str.w r0, [r11, #29] +; CHECK-NEXT: lsr.w r0, r9, #10 +; CHECK-NEXT: strb.w r0, [r11, #24] ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __aeabi_f2lz ; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: ubfx r2, r10, #14, #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: vcmp.f32 s18, s18 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r0, #0 +; CHECK-NEXT: orr.w r2, r2, r0, lsl #4 +; CHECK-NEXT: str.w r2, [r11, #12] +; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: ldr r2, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itt lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: movtlt r6, #65534 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: movlt r2, #0 +; CHECK-NEXT: movtlt r2, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s24, s24 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #1 -; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: movwgt r2, #65535 +; CHECK-NEXT: movtgt r2, #1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it vs -; CHECK-NEXT: movvs r6, #0 -; CHECK-NEXT: bfc r6, #18, #14 ; CHECK-NEXT: vcmp.f32 s18, s22 +; CHECK-NEXT: it vs +; CHECK-NEXT: movvs r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: lsr.w r2, r6, #14 -; CHECK-NEXT: orr.w r2, r2, r0, lsl #4 -; CHECK-NEXT: str.w r2, [r10, #12] ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: movtlt r1, #65534 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, s22 +; CHECK-NEXT: vcmp.f32 s16, s22 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #1 @@ -5301,36 +5299,36 @@ ; CHECK-NEXT: itt lt ; CHECK-NEXT: movlt r4, #0 ; CHECK-NEXT: movtlt r4, #65534 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: lsr.w r2, r8, #14 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: bfc r2, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r4, #65535 ; CHECK-NEXT: movtgt r4, #1 -; CHECK-NEXT: vcmp.f32 s24, s24 -; CHECK-NEXT: orr.w r2, r2, r6, lsl #18 -; CHECK-NEXT: str.w r2, [r10, #8] +; CHECK-NEXT: vcmp.f32 s16, s16 +; CHECK-NEXT: orr.w r2, r2, r8, lsl #18 +; CHECK-NEXT: str.w r2, [r11, #4] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r4, #0 -; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: bfc r4, #18, #14 -; CHECK-NEXT: lsrs r2, r5, #28 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsrs r2, r7, #28 ; CHECK-NEXT: vcmp.f32 s18, s18 -; CHECK-NEXT: lsrs r0, r0, #28 ; CHECK-NEXT: orr.w r2, r2, r4, lsl #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: orr.w r2, r2, r3, lsl #22 -; CHECK-NEXT: str.w r2, [r10, #41] +; CHECK-NEXT: str.w r2, [r11, #41] ; CHECK-NEXT: it vs ; CHECK-NEXT: movvs r1, #0 +; CHECK-NEXT: lsrs r0, r0, #28 ; CHECK-NEXT: bfc r1, #18, #14 ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-NEXT: orr.w r0, r0, r1, lsl #22 -; CHECK-NEXT: str.w r0, [r10, #16] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} +; CHECK-NEXT: str.w r0, [r11, #16] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll --- a/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll +++ b/llvm/test/CodeGen/Thumb2/mve-fptoui-sat-vector.ll @@ -1526,29 +1526,29 @@ ; CHECK-NEXT: mov r8, r0 ; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: vldr s20, .LCPI28_0 -; CHECK-NEXT: vmov r5, s18 +; CHECK-NEXT: vmov r4, s17 ; CHECK-NEXT: vmov r6, s19 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: mov r7, r0 ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: mov r0, r5 +; CHECK-NEXT: mov r0, r4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r7, r1 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r5, r1 +; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r9, r0 ; CHECK-NEXT: mov r0, r6 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #3 +; CHECK-NEXT: movwgt r10, #65535 +; CHECK-NEXT: movtgt r10, #3 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s19, #0 ; CHECK-NEXT: mov r6, r1 @@ -1562,27 +1562,38 @@ ; CHECK-NEXT: movwgt r6, #65535 ; CHECK-NEXT: movtgt r6, #3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r10, r0 ; CHECK-NEXT: vcmp.f32 s19, #0 ; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s19, s20 +; CHECK-NEXT: str.w r7, [r8] +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: lsl.w r0, r6, #22 +; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r4, #-1 +; CHECK-NEXT: orr.w r0, r0, r4, lsr #10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r4, [r8] +; CHECK-NEXT: str.w r0, [r8, #20] ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r10, #0 -; CHECK-NEXT: vcmp.f32 s19, s20 -; CHECK-NEXT: lsls r0, r6, #22 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: vcmp.f32 s17, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r10, #-1 -; CHECK-NEXT: orr.w r1, r0, r10, lsr #10 -; CHECK-NEXT: vmov r0, s17 -; CHECK-NEXT: str.w r1, [r8, #20] +; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: lsr.w r0, r9, #14 +; CHECK-NEXT: orr.w r1, r0, r10, lsl #18 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: str.w r1, [r8, #8] ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: lsrs r2, r6, #10 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr @@ -1593,42 +1604,30 @@ ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s17, #0 +; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #3 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #3 ; CHECK-NEXT: strb.w r2, [r8, #24] +; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s17, s20 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: bfc r7, #18, #14 +; CHECK-NEXT: ubfx r2, r10, #14, #4 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r2, r7, r0, lsl #18 -; CHECK-NEXT: lsrs r0, r0, #14 -; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: str.w r2, [r8, #4] -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: lsrs r2, r1, #14 -; CHECK-NEXT: orr.w r0, r0, r1, lsl #18 +; CHECK-NEXT: orr.w r2, r2, r0, lsl #4 +; CHECK-NEXT: lsrs r0, r0, #28 +; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: orr.w r2, r2, r9, lsl #4 ; CHECK-NEXT: str.w r2, [r8, #12] -; CHECK-NEXT: str.w r0, [r8, #8] -; CHECK-NEXT: lsr.w r0, r9, #28 -; CHECK-NEXT: orr.w r0, r0, r5, lsl #4 -; CHECK-NEXT: orr.w r0, r0, r10, lsl #22 +; CHECK-NEXT: orr.w r2, r5, r9, lsl #18 +; CHECK-NEXT: str.w r2, [r8, #4] +; CHECK-NEXT: orr.w r0, r0, r4, lsl #22 ; CHECK-NEXT: str.w r0, [r8, #16] ; CHECK-NEXT: vpop {d8, d9, d10} ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, pc} @@ -3906,236 +3905,236 @@ ; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr} ; CHECK-NEXT: .pad #4 ; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13, d14} -; CHECK-NEXT: .pad #8 -; CHECK-NEXT: sub sp, #8 +; CHECK-NEXT: .vsave {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: vpush {d8, d9, d10, d11, d12, d13} +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 ; CHECK-NEXT: vmov q4, q0 -; CHECK-NEXT: mov r10, r0 +; CHECK-NEXT: mov r4, r0 ; CHECK-NEXT: vcvtb.f32.f16 s24, s18 ; CHECK-NEXT: vmov r0, s24 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcvtt.f32.f16 s28, s19 -; CHECK-NEXT: mov r6, r0 -; CHECK-NEXT: vmov r0, s28 +; CHECK-NEXT: vcvtt.f32.f16 s26, s19 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: vcvtb.f32.f16 s22, s16 -; CHECK-NEXT: vcvtt.f32.f16 s26, s17 +; CHECK-NEXT: vcvtt.f32.f16 s18, s18 ; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: str r1, [sp, #8] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: mov r7, r1 ; CHECK-NEXT: vmov r5, s22 ; CHECK-NEXT: vldr s20, .LCPI48_0 -; CHECK-NEXT: vmov r8, s26 +; CHECK-NEXT: vmov r8, s18 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: mov r4, r1 +; CHECK-NEXT: vcmp.f32 s26, #0 +; CHECK-NEXT: mov r10, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: mov r6, r0 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 +; CHECK-NEXT: movlt.w r10, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: mov r0, r5 ; CHECK-NEXT: vcmp.f32 s24, s20 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r4, #65535 -; CHECK-NEXT: movtgt r4, #3 +; CHECK-NEXT: movwgt r10, #65535 +; CHECK-NEXT: movtgt r10, #3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r6, #-1 -; CHECK-NEXT: str.w r6, [r10, #25] +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: str.w r7, [r4, #25] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r11, r1 +; CHECK-NEXT: str r1, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s26, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 -; CHECK-NEXT: vcmp.f32 s28, #0 -; CHECK-NEXT: str.w r0, [r10] +; CHECK-NEXT: str r0, [r4] ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 -; CHECK-NEXT: vcmp.f32 s28, s20 -; CHECK-NEXT: lsls r0, r4, #22 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: vcmp.f32 s26, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 -; CHECK-NEXT: orr.w r5, r0, r9, lsr #10 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: lsl.w r0, r10, #22 +; CHECK-NEXT: str r6, [sp, #12] @ 4-byte Spill +; CHECK-NEXT: orr.w r6, r0, r6, lsr #10 ; CHECK-NEXT: mov r0, r8 -; CHECK-NEXT: str.w r9, [sp, #4] @ 4-byte Spill ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r6, r1 +; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: mov r5, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, s20 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: movlt r5, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s26, #0 -; CHECK-NEXT: mov r1, r0 +; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #3 +; CHECK-NEXT: movwgt r5, #65535 +; CHECK-NEXT: movtgt r5, #3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: str.w r5, [r10, #45] +; CHECK-NEXT: str.w r6, [r4, #45] +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r1, #0 -; CHECK-NEXT: vcmp.f32 s26, s20 -; CHECK-NEXT: lsls r0, r6, #22 +; CHECK-NEXT: movlt r7, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r1, #-1 -; CHECK-NEXT: orr.w r0, r0, r1, lsr #10 -; CHECK-NEXT: vcvtt.f32.f16 s18, s18 -; CHECK-NEXT: str r1, [sp] @ 4-byte Spill -; CHECK-NEXT: lsrs r1, r4, #10 -; CHECK-NEXT: str.w r0, [r10, #20] +; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: lsrs r0, r7, #14 +; CHECK-NEXT: orr.w r0, r0, r5, lsl #18 +; CHECK-NEXT: vcvtt.f32.f16 s18, s17 +; CHECK-NEXT: str.w r0, [r4, #33] ; CHECK-NEXT: vmov r0, s18 -; CHECK-NEXT: strb.w r1, [r10, #49] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s18, #0 -; CHECK-NEXT: mov r9, r0 +; CHECK-NEXT: mov r9, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: movlt r0, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r9, #-1 +; CHECK-NEXT: movgt.w r0, #-1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: str r0, [sp] @ 4-byte Spill ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 -; CHECK-NEXT: vcmp.f32 s24, s20 -; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: movlt.w r9, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r1, r0 ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r7, #65535 -; CHECK-NEXT: movtgt r7, #3 -; CHECK-NEXT: bfc r7, #18, #14 -; CHECK-NEXT: mov r5, r1 -; CHECK-NEXT: orr.w r0, r7, r9, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #29] +; CHECK-NEXT: movwgt r9, #65535 +; CHECK-NEXT: movtgt r9, #3 +; CHECK-NEXT: lsl.w r0, r9, #22 +; CHECK-NEXT: orr.w r0, r0, r1, lsr #10 +; CHECK-NEXT: vcvtt.f32.f16 s16, s16 +; CHECK-NEXT: str r0, [r4, #20] ; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: lsrs r1, r6, #10 -; CHECK-NEXT: strb.w r1, [r10, #24] ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s16, #0 -; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: mov r11, r1 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt.w r8, #0 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r8, #-1 -; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 -; CHECK-NEXT: it lt ; CHECK-NEXT: movlt.w r11, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: mov r8, r0 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r11, #65535 ; CHECK-NEXT: movtgt r11, #3 -; CHECK-NEXT: vcvtb.f32.f16 s22, s19 -; CHECK-NEXT: bfc r11, #18, #14 -; CHECK-NEXT: mov r6, r1 -; CHECK-NEXT: orr.w r0, r11, r8, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #4] -; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt.w r8, #0 +; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: it gt +; CHECK-NEXT: movgt.w r8, #-1 +; CHECK-NEXT: lsr.w r0, r8, #14 +; CHECK-NEXT: vcvtb.f32.f16 s16, s19 +; CHECK-NEXT: orr.w r0, r0, r11, lsl #18 +; CHECK-NEXT: str r0, [r4, #8] +; CHECK-NEXT: lsr.w r0, r10, #10 +; CHECK-NEXT: strb.w r0, [r4, #49] +; CHECK-NEXT: vmov r0, s16 ; CHECK-NEXT: bl __aeabi_f2ulz -; CHECK-NEXT: vcmp.f32 s22, #0 -; CHECK-NEXT: mov r7, r0 +; CHECK-NEXT: mov r6, r0 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r7, #0 +; CHECK-NEXT: movlt r6, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: ubfx r0, r5, #14, #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, #0 ; CHECK-NEXT: it gt -; CHECK-NEXT: movgt.w r7, #-1 +; CHECK-NEXT: movgt.w r6, #-1 +; CHECK-NEXT: orr.w r0, r0, r6, lsl #4 +; CHECK-NEXT: str.w r0, [r4, #37] +; CHECK-NEXT: vcmp.f32 s24, #0 +; CHECK-NEXT: ldr r0, [sp, #8] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r5, #0 +; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s24, s20 +; CHECK-NEXT: vcvtb.f32.f16 s18, s17 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r5, #65535 -; CHECK-NEXT: movtgt r5, #3 -; CHECK-NEXT: vcvtb.f32.f16 s18, s17 -; CHECK-NEXT: bfc r5, #18, #14 -; CHECK-NEXT: mov r4, r1 -; CHECK-NEXT: lsrs r0, r5, #14 -; CHECK-NEXT: orr.w r0, r0, r7, lsl #4 -; CHECK-NEXT: str.w r0, [r10, #37] -; CHECK-NEXT: lsr.w r0, r9, #14 -; CHECK-NEXT: orr.w r0, r0, r5, lsl #18 -; CHECK-NEXT: str.w r0, [r10, #33] +; CHECK-NEXT: movwgt r0, #65535 +; CHECK-NEXT: movtgt r0, #3 +; CHECK-NEXT: bfc r0, #18, #14 +; CHECK-NEXT: mov r10, r1 +; CHECK-NEXT: orr.w r0, r0, r7, lsl #18 +; CHECK-NEXT: str.w r0, [r4, #29] +; CHECK-NEXT: lsr.w r0, r9, #10 +; CHECK-NEXT: strb r0, [r4, #24] ; CHECK-NEXT: vmov r0, s18 ; CHECK-NEXT: bl __aeabi_f2ulz ; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: ubfx r2, r11, #14, #4 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r0, #0 +; CHECK-NEXT: vcmp.f32 s18, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: it gt ; CHECK-NEXT: movgt.w r0, #-1 +; CHECK-NEXT: orr.w r2, r2, r0, lsl #4 +; CHECK-NEXT: str r2, [r4, #12] +; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: ldr r2, [sp, #4] @ 4-byte Reload ; CHECK-NEXT: vmrs APSR_nzcv, fpscr +; CHECK-NEXT: vcmp.f32 s22, s20 ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r6, #0 -; CHECK-NEXT: vcmp.f32 s16, s20 +; CHECK-NEXT: movlt r2, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r6, #65535 -; CHECK-NEXT: movtgt r6, #3 -; CHECK-NEXT: bfc r6, #18, #14 ; CHECK-NEXT: vcmp.f32 s18, #0 +; CHECK-NEXT: itt gt +; CHECK-NEXT: movwgt r2, #65535 +; CHECK-NEXT: movtgt r2, #3 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: vcmp.f32 s18, s20 -; CHECK-NEXT: lsr.w r2, r6, #14 -; CHECK-NEXT: orr.w r2, r2, r0, lsl #4 -; CHECK-NEXT: str.w r2, [r10, #12] ; CHECK-NEXT: it lt ; CHECK-NEXT: movlt r1, #0 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr -; CHECK-NEXT: vcmp.f32 s22, #0 +; CHECK-NEXT: vcmp.f32 s16, #0 ; CHECK-NEXT: itt gt ; CHECK-NEXT: movwgt r1, #65535 ; CHECK-NEXT: movtgt r1, #3 -; CHECK-NEXT: lsr.w r2, r8, #14 +; CHECK-NEXT: bfc r2, #18, #14 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: it lt -; CHECK-NEXT: movlt r4, #0 -; CHECK-NEXT: vcmp.f32 s22, s20 +; CHECK-NEXT: movlt.w r10, #0 +; CHECK-NEXT: vcmp.f32 s16, s20 ; CHECK-NEXT: vmrs APSR_nzcv, fpscr ; CHECK-NEXT: itt gt -; CHECK-NEXT: movwgt r4, #65535 -; CHECK-NEXT: movtgt r4, #3 -; CHECK-NEXT: orr.w r2, r2, r6, lsl #18 -; CHECK-NEXT: str.w r2, [r10, #8] -; CHECK-NEXT: bfc r4, #18, #14 -; CHECK-NEXT: ldr r3, [sp, #4] @ 4-byte Reload -; CHECK-NEXT: lsrs r2, r7, #28 +; CHECK-NEXT: movwgt r10, #65535 +; CHECK-NEXT: movtgt r10, #3 +; CHECK-NEXT: orr.w r2, r2, r8, lsl #18 +; CHECK-NEXT: str r2, [r4, #4] +; CHECK-NEXT: bfc r10, #18, #14 +; CHECK-NEXT: ldr r3, [sp, #12] @ 4-byte Reload +; CHECK-NEXT: lsrs r2, r6, #28 ; CHECK-NEXT: bfc r1, #18, #14 -; CHECK-NEXT: orr.w r2, r2, r4, lsl #4 +; CHECK-NEXT: orr.w r2, r2, r10, lsl #4 ; CHECK-NEXT: lsrs r0, r0, #28 ; CHECK-NEXT: orr.w r2, r2, r3, lsl #22 -; CHECK-NEXT: str.w r2, [r10, #41] +; CHECK-NEXT: str.w r2, [r4, #41] ; CHECK-NEXT: orr.w r0, r0, r1, lsl #4 ; CHECK-NEXT: ldr r1, [sp] @ 4-byte Reload ; CHECK-NEXT: orr.w r0, r0, r1, lsl #22 -; CHECK-NEXT: str.w r0, [r10, #16] -; CHECK-NEXT: add sp, #8 -; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14} +; CHECK-NEXT: str r0, [r4, #16] +; CHECK-NEXT: add sp, #16 +; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13} ; CHECK-NEXT: add sp, #4 ; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc} ; CHECK-NEXT: .p2align 2 diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll --- a/llvm/test/CodeGen/X86/load-local-v3i129.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll @@ -7,12 +7,14 @@ ; FAST-SHLD: # %bb.0: # %Entry ; FAST-SHLD-NEXT: movq -40(%rsp), %rax ; FAST-SHLD-NEXT: movq -32(%rsp), %rcx -; FAST-SHLD-NEXT: shrdq $2, %rcx, %rax +; FAST-SHLD-NEXT: movq %rcx, %rdx +; FAST-SHLD-NEXT: shlq $62, %rdx ; FAST-SHLD-NEXT: shrq $2, %rcx -; FAST-SHLD-NEXT: leaq 1(,%rax,4), %rdx -; FAST-SHLD-NEXT: movq %rdx, -40(%rsp) -; FAST-SHLD-NEXT: shrdq $62, %rcx, %rax -; FAST-SHLD-NEXT: movq %rax, -32(%rsp) +; FAST-SHLD-NEXT: shldq $2, %rdx, %rcx +; FAST-SHLD-NEXT: andq $-4, %rax +; FAST-SHLD-NEXT: orq $1, %rax +; FAST-SHLD-NEXT: movq %rax, -40(%rsp) +; FAST-SHLD-NEXT: movq %rcx, -32(%rsp) ; FAST-SHLD-NEXT: orq $-2, -56(%rsp) ; FAST-SHLD-NEXT: movq $-1, -48(%rsp) ; FAST-SHLD-NEXT: retq @@ -20,17 +22,9 @@ ; SLOW-SHLD-LABEL: _start: ; SLOW-SHLD: # %bb.0: # %Entry ; SLOW-SHLD-NEXT: movq -40(%rsp), %rax -; SLOW-SHLD-NEXT: movq -32(%rsp), %rcx -; SLOW-SHLD-NEXT: shrq $2, %rax -; SLOW-SHLD-NEXT: movq %rcx, %rdx -; SLOW-SHLD-NEXT: shlq $62, %rdx -; SLOW-SHLD-NEXT: orq %rax, %rdx -; SLOW-SHLD-NEXT: andq $-4, %rcx -; SLOW-SHLD-NEXT: leaq 1(,%rdx,4), %rax +; SLOW-SHLD-NEXT: andq $-4, %rax +; SLOW-SHLD-NEXT: orq $1, %rax ; SLOW-SHLD-NEXT: movq %rax, -40(%rsp) -; SLOW-SHLD-NEXT: shrq $62, %rdx -; SLOW-SHLD-NEXT: orq %rcx, %rdx -; SLOW-SHLD-NEXT: movq %rdx, -32(%rsp) ; SLOW-SHLD-NEXT: orq $-2, -56(%rsp) ; SLOW-SHLD-NEXT: movq $-1, -48(%rsp) ; SLOW-SHLD-NEXT: retq diff --git a/llvm/test/CodeGen/X86/mul128.ll b/llvm/test/CodeGen/X86/mul128.ll --- a/llvm/test/CodeGen/X86/mul128.ll +++ b/llvm/test/CodeGen/X86/mul128.ll @@ -107,12 +107,12 @@ define void @PR13897() nounwind { ; X64-LABEL: PR13897: ; X64: # %bb.0: # %"0x0" -; X64-NEXT: movl bbb(%rip), %eax -; X64-NEXT: movq %rax, %rcx -; X64-NEXT: shlq $32, %rcx -; X64-NEXT: orq %rax, %rcx -; X64-NEXT: movq %rcx, aaa+8(%rip) -; X64-NEXT: movq %rcx, aaa(%rip) +; X64-NEXT: movq bbb(%rip), %rax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: shlq $32, %rax +; X64-NEXT: orq %rcx, %rax +; X64-NEXT: movq %rax, aaa+8(%rip) +; X64-NEXT: movq %rax, aaa(%rip) ; X64-NEXT: retq ; ; X86-LABEL: PR13897: diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -285,15 +285,14 @@ ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movzwl %cx, %ecx -; X86-NEXT: addl %ecx, %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: shrl $16, %edx -; X86-NEXT: shll $16, %ecx +; X86-NEXT: shll $17, %edx +; X86-NEXT: shrl $15, %ecx +; X86-NEXT: andl $1, %ecx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %edx ; X86-NEXT: pushl %ecx +; X86-NEXT: pushl %edx ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF