diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -2790,6 +2790,17 @@ Amt, DAG.getConstant(0, dl, ShTy), ISD::SETEQ); + SDValue NVTBitsM1 = DAG.getConstant(NVTBits - 1, dl, ShTy); + + // Mask all shift amounts to be in a valid range. + // TODO: This is only really needed if the shift will be expanded to a libcall + // later. Otherwise an out of bounds shift produces poison, but the select + // won't let it propagate. + assert(isPowerOf2_32(NVTBits) && "Unexpected VT"); + AmtExcess = DAG.getNode(ISD::AND, dl, ShTy, AmtExcess, NVTBitsM1); + AmtLack = DAG.getNode(ISD::AND, dl, ShTy, AmtLack, NVTBitsM1); + Amt = DAG.getNode(ISD::AND, dl, ShTy, AmtLack, NVTBitsM1); + SDValue LoS, HiS, LoL, HiL; switch (N->getOpcode()) { default: llvm_unreachable("Unknown shift"); @@ -2833,8 +2844,7 @@ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack)); // Long: ShAmt >= NVTBits - HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part. - DAG.getConstant(NVTBits - 1, dl, ShTy)); + HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, NVTBitsM1); // Sign of Hi part. LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. Lo = DAG.getSelect(dl, NVT, isZero, InL, diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll --- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll +++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll @@ -5,20 +5,20 @@ ; GCN-LABEL: v_shl_i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_sub_i32_e32 v7, vcc, 64, v4 -; GCN-NEXT: v_lshl_b64 v[5:6], v[2:3], v4 -; GCN-NEXT: v_lshr_b64 v[7:8], v[0:1], v7 +; GCN-NEXT: v_sub_i32_e32 v9, vcc, 64, v4 +; GCN-NEXT: v_lshr_b64 v[5:6], v[0:1], v9 +; GCN-NEXT: v_lshl_b64 v[7:8], v[2:3], v9 ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GCN-NEXT: v_or_b32_e32 v7, v5, v7 +; GCN-NEXT: v_or_b32_e32 v7, v7, v5 ; GCN-NEXT: v_subrev_i32_e32 v5, vcc, 64, v4 -; GCN-NEXT: v_or_b32_e32 v8, v6, v8 +; GCN-NEXT: v_or_b32_e32 v8, v8, v6 ; GCN-NEXT: v_lshl_b64 v[5:6], v[0:1], v5 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 +; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], v9 ; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], v4 +; GCN-NEXT: v_cndmask_b32_e32 v4, v6, v8, vcc ; GCN-NEXT: v_cndmask_b32_e64 v2, v5, v2, s[4:5] -; GCN-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GCN-NEXT: v_cndmask_b32_e64 v3, v5, v3, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v3, v4, v3, s[4:5] ; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -30,20 +30,20 @@ ; GCN-LABEL: v_lshr_i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_sub_i32_e32 v7, vcc, 64, v4 -; GCN-NEXT: v_lshr_b64 v[5:6], v[0:1], v4 -; GCN-NEXT: v_lshl_b64 v[7:8], v[2:3], v7 +; GCN-NEXT: v_sub_i32_e32 v9, vcc, 64, v4 +; GCN-NEXT: v_lshl_b64 v[5:6], v[2:3], v9 +; GCN-NEXT: v_lshr_b64 v[7:8], v[0:1], v9 ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GCN-NEXT: v_or_b32_e32 v7, v5, v7 +; GCN-NEXT: v_or_b32_e32 v7, v7, v5 ; GCN-NEXT: v_subrev_i32_e32 v5, vcc, 64, v4 -; GCN-NEXT: v_or_b32_e32 v8, v6, v8 +; GCN-NEXT: v_or_b32_e32 v8, v8, v6 ; GCN-NEXT: v_lshr_b64 v[5:6], v[2:3], v5 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 +; GCN-NEXT: v_lshr_b64 v[2:3], v[2:3], v9 ; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc -; GCN-NEXT: v_lshr_b64 v[2:3], v[2:3], v4 +; GCN-NEXT: v_cndmask_b32_e32 v4, v6, v8, vcc ; GCN-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] ; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GCN-NEXT: s_setpc_b64 s[30:31] @@ -56,20 +56,20 @@ ; GCN-LABEL: v_ashr_i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_sub_i32_e32 v7, vcc, 64, v4 -; GCN-NEXT: v_lshr_b64 v[5:6], v[0:1], v4 -; GCN-NEXT: v_lshl_b64 v[7:8], v[2:3], v7 +; GCN-NEXT: v_sub_i32_e32 v9, vcc, 64, v4 +; GCN-NEXT: v_lshl_b64 v[5:6], v[2:3], v9 +; GCN-NEXT: v_lshr_b64 v[7:8], v[0:1], v9 ; GCN-NEXT: v_cmp_eq_u32_e64 s[4:5], 0, v4 -; GCN-NEXT: v_or_b32_e32 v7, v5, v7 +; GCN-NEXT: v_or_b32_e32 v7, v7, v5 ; GCN-NEXT: v_subrev_i32_e32 v5, vcc, 64, v4 -; GCN-NEXT: v_or_b32_e32 v8, v6, v8 +; GCN-NEXT: v_or_b32_e32 v8, v8, v6 ; GCN-NEXT: v_ashr_i64 v[5:6], v[2:3], v5 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v4 ; GCN-NEXT: v_cndmask_b32_e32 v5, v5, v7, vcc +; GCN-NEXT: v_cndmask_b32_e32 v4, v6, v8, vcc ; GCN-NEXT: v_cndmask_b32_e64 v0, v5, v0, s[4:5] -; GCN-NEXT: v_cndmask_b32_e32 v5, v6, v8, vcc -; GCN-NEXT: v_cndmask_b32_e64 v1, v5, v1, s[4:5] -; GCN-NEXT: v_ashr_i64 v[4:5], v[2:3], v4 +; GCN-NEXT: v_cndmask_b32_e64 v1, v4, v1, s[4:5] +; GCN-NEXT: v_ashr_i64 v[4:5], v[2:3], v9 ; GCN-NEXT: v_ashrrev_i32_e32 v3, 31, v3 ; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v4, vcc ; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc @@ -125,15 +125,15 @@ ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GCN-NEXT: v_sub_i32_e32 v1, vcc, 64, v0 +; GCN-NEXT: v_subrev_i32_e32 v4, vcc, 64, v0 ; GCN-NEXT: v_lshr_b64 v[2:3], 17, v1 -; GCN-NEXT: v_subrev_i32_e32 v1, vcc, 64, v0 -; GCN-NEXT: v_lshl_b64 v[4:5], 17, v1 +; GCN-NEXT: v_lshl_b64 v[4:5], 17, v4 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 -; GCN-NEXT: v_cndmask_b32_e32 v1, v4, v2, vcc ; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 -; GCN-NEXT: v_cndmask_b32_e64 v2, 0, v1, s[4:5] -; GCN-NEXT: v_lshl_b64 v[0:1], 17, v0 +; GCN-NEXT: v_lshl_b64 v[0:1], 17, v1 +; GCN-NEXT: v_cndmask_b32_e32 v2, v4, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v3, v5, v3, vcc +; GCN-NEXT: v_cndmask_b32_e64 v2, 0, v2, s[4:5] ; GCN-NEXT: v_cndmask_b32_e64 v3, 0, v3, s[4:5] ; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -146,8 +146,9 @@ ; GCN-LABEL: v_lshr_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) +; GCN-NEXT: v_sub_i32_e32 v1, vcc, 64, v0 ; GCN-NEXT: s_mov_b64 s[4:5], 0x41 -; GCN-NEXT: v_lshr_b64 v[1:2], s[4:5], v0 +; GCN-NEXT: v_lshr_b64 v[1:2], s[4:5], v1 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 ; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -166,7 +167,8 @@ ; GCN-LABEL: v_ashr_i128_kv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_lshr_b64 v[1:2], 33, v0 +; GCN-NEXT: v_sub_i32_e32 v1, vcc, 64, v0 +; GCN-NEXT: v_lshr_b64 v[1:2], 33, v1 ; GCN-NEXT: v_cmp_gt_u32_e32 vcc, 64, v0 ; GCN-NEXT: v_cmp_ne_u32_e64 s[4:5], 0, v0 ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc @@ -187,18 +189,18 @@ ; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: v_mov_b32_e32 v5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s5, s4, 64 -; GCN-NEXT: s_sub_i32 s12, 64, s4 -; GCN-NEXT: s_lshl_b64 s[6:7], s[2:3], s4 -; GCN-NEXT: s_lshl_b64 s[8:9], s[0:1], s4 +; GCN-NEXT: s_sub_i32 s5, 64, s4 +; GCN-NEXT: s_sub_i32 s12, s4, 64 +; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s5 +; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s5 ; GCN-NEXT: s_lshl_b64 s[10:11], s[0:1], s5 -; GCN-NEXT: s_lshr_b64 s[0:1], s[0:1], s12 -; GCN-NEXT: s_or_b64 s[0:1], s[6:7], s[0:1] +; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s12 +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] ; GCN-NEXT: s_cmp_lt_u32 s4, 64 -; GCN-NEXT: s_cselect_b32 s0, s0, s10 -; GCN-NEXT: s_cselect_b32 s1, s1, s11 -; GCN-NEXT: s_cselect_b32 s5, s9, 0 -; GCN-NEXT: s_cselect_b32 s6, s8, 0 +; GCN-NEXT: s_cselect_b32 s0, s6, s0 +; GCN-NEXT: s_cselect_b32 s1, s7, s1 +; GCN-NEXT: s_cselect_b32 s5, s11, 0 +; GCN-NEXT: s_cselect_b32 s6, s10, 0 ; GCN-NEXT: s_cmp_eq_u32 s4, 0 ; GCN-NEXT: s_cselect_b32 s1, s3, s1 ; GCN-NEXT: s_cselect_b32 s0, s2, s0 @@ -220,18 +222,18 @@ ; GCN-NEXT: v_mov_b32_e32 v4, 0 ; GCN-NEXT: v_mov_b32_e32 v5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) -; GCN-NEXT: s_sub_i32 s5, s4, 64 -; GCN-NEXT: s_sub_i32 s12, 64, s4 -; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 -; GCN-NEXT: s_lshr_b64 s[8:9], s[2:3], s4 +; GCN-NEXT: s_sub_i32 s5, 64, s4 +; GCN-NEXT: s_sub_i32 s12, s4, 64 +; GCN-NEXT: s_lshl_b64 s[6:7], s[2:3], s5 +; GCN-NEXT: s_lshr_b64 s[8:9], s[0:1], s5 ; GCN-NEXT: s_lshr_b64 s[10:11], s[2:3], s5 -; GCN-NEXT: s_lshl_b64 s[2:3], s[2:3], s12 -; GCN-NEXT: s_or_b64 s[2:3], s[6:7], s[2:3] +; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s12 +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] ; GCN-NEXT: s_cmp_lt_u32 s4, 64 -; GCN-NEXT: s_cselect_b32 s2, s2, s10 -; GCN-NEXT: s_cselect_b32 s3, s3, s11 -; GCN-NEXT: s_cselect_b32 s5, s9, 0 -; GCN-NEXT: s_cselect_b32 s6, s8, 0 +; GCN-NEXT: s_cselect_b32 s2, s6, s2 +; GCN-NEXT: s_cselect_b32 s3, s7, s3 +; GCN-NEXT: s_cselect_b32 s5, s11, 0 +; GCN-NEXT: s_cselect_b32 s6, s10, 0 ; GCN-NEXT: s_cmp_eq_u32 s4, 0 ; GCN-NEXT: s_cselect_b32 s1, s1, s3 ; GCN-NEXT: s_cselect_b32 s0, s0, s2 @@ -254,25 +256,25 @@ ; GCN-NEXT: v_mov_b32_e32 v5, 0 ; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: s_sub_i32 s5, 64, s4 -; GCN-NEXT: s_lshr_b64 s[6:7], s[0:1], s4 -; GCN-NEXT: s_sub_i32 s10, s4, 64 -; GCN-NEXT: s_lshl_b64 s[8:9], s[2:3], s5 -; GCN-NEXT: s_ashr_i32 s12, s3, 31 -; GCN-NEXT: s_ashr_i64 s[10:11], s[2:3], s10 -; GCN-NEXT: s_or_b64 s[6:7], s[6:7], s[8:9] -; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s4 +; GCN-NEXT: s_sub_i32 s12, s4, 64 +; GCN-NEXT: s_lshl_b64 s[6:7], s[2:3], s5 +; GCN-NEXT: s_lshr_b64 s[8:9], s[0:1], s5 +; GCN-NEXT: s_ashr_i64 s[10:11], s[2:3], s5 +; GCN-NEXT: s_ashr_i64 s[12:13], s[2:3], s12 +; GCN-NEXT: s_or_b64 s[6:7], s[8:9], s[6:7] +; GCN-NEXT: s_ashr_i32 s2, s3, 31 ; GCN-NEXT: s_cmp_lt_u32 s4, 64 -; GCN-NEXT: s_cselect_b32 s3, s3, s12 -; GCN-NEXT: s_cselect_b32 s2, s2, s12 -; GCN-NEXT: s_cselect_b32 s5, s6, s10 -; GCN-NEXT: s_cselect_b32 s6, s7, s11 +; GCN-NEXT: s_cselect_b32 s3, s6, s12 +; GCN-NEXT: s_cselect_b32 s5, s7, s13 +; GCN-NEXT: s_cselect_b32 s6, s11, s2 +; GCN-NEXT: s_cselect_b32 s2, s10, s2 ; GCN-NEXT: s_cmp_eq_u32 s4, 0 -; GCN-NEXT: s_cselect_b32 s1, s1, s6 -; GCN-NEXT: s_cselect_b32 s0, s0, s5 +; GCN-NEXT: s_cselect_b32 s1, s1, s5 +; GCN-NEXT: s_cselect_b32 s0, s0, s3 ; GCN-NEXT: v_mov_b32_e32 v0, s0 ; GCN-NEXT: v_mov_b32_e32 v1, s1 ; GCN-NEXT: v_mov_b32_e32 v2, s2 -; GCN-NEXT: v_mov_b32_e32 v3, s3 +; GCN-NEXT: v_mov_b32_e32 v3, s6 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = ashr i128 %lhs, %rhs @@ -284,42 +286,42 @@ ; GCN-LABEL: v_shl_v2i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_sub_i32_e32 v16, vcc, 64, v8 -; GCN-NEXT: v_lshr_b64 v[16:17], v[0:1], v16 -; GCN-NEXT: v_lshl_b64 v[18:19], v[2:3], v8 -; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[8:9] -; GCN-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] -; GCN-NEXT: v_or_b32_e32 v11, v9, v11 -; GCN-NEXT: v_subrev_i32_e64 v9, s[6:7], 64, v8 -; GCN-NEXT: v_or_b32_e32 v19, v19, v17 -; GCN-NEXT: v_or_b32_e32 v18, v18, v16 +; GCN-NEXT: v_sub_i32_e32 v20, vcc, 64, v8 +; GCN-NEXT: v_lshr_b64 v[16:17], v[0:1], v20 +; GCN-NEXT: v_lshl_b64 v[18:19], v[2:3], v20 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GCN-NEXT: v_or_b32_e32 v10, v8, v10 -; GCN-NEXT: v_lshl_b64 v[16:17], v[0:1], v9 -; GCN-NEXT: s_and_b64 vcc, s[4:5], vcc +; GCN-NEXT: v_cmp_gt_u64_e64 s[4:5], 64, v[8:9] +; GCN-NEXT: v_subrev_i32_e64 v8, s[6:7], 64, v8 +; GCN-NEXT: v_or_b32_e32 v11, v9, v11 +; GCN-NEXT: v_lshl_b64 v[8:9], v[0:1], v8 +; GCN-NEXT: v_or_b32_e32 v17, v19, v17 +; GCN-NEXT: v_or_b32_e32 v16, v18, v16 +; GCN-NEXT: s_and_b64 vcc, vcc, s[4:5] ; GCN-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] -; GCN-NEXT: v_cndmask_b32_e32 v9, v16, v18, vcc -; GCN-NEXT: v_cndmask_b32_e64 v2, v9, v2, s[4:5] -; GCN-NEXT: v_sub_i32_e64 v9, s[6:7], 64, v12 -; GCN-NEXT: v_cndmask_b32_e32 v11, v17, v19, vcc -; GCN-NEXT: v_lshr_b64 v[9:10], v[4:5], v9 -; GCN-NEXT: v_lshl_b64 v[16:17], v[6:7], v12 -; GCN-NEXT: v_cndmask_b32_e64 v3, v11, v3, s[4:5] -; GCN-NEXT: v_or_b32_e32 v16, v16, v9 +; GCN-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v16, v9, v17, vcc +; GCN-NEXT: v_sub_i32_e64 v17, s[6:7], 64, v12 +; GCN-NEXT: v_cndmask_b32_e64 v2, v8, v2, s[4:5] +; GCN-NEXT: v_lshr_b64 v[8:9], v[4:5], v17 +; GCN-NEXT: v_lshl_b64 v[10:11], v[6:7], v17 +; GCN-NEXT: v_cndmask_b32_e64 v3, v16, v3, s[4:5] +; GCN-NEXT: v_or_b32_e32 v10, v10, v8 ; GCN-NEXT: v_cmp_gt_u64_e64 s[4:5], 64, v[12:13] ; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] -; GCN-NEXT: v_subrev_i32_e64 v9, s[8:9], 64, v12 -; GCN-NEXT: v_or_b32_e32 v11, v17, v10 -; GCN-NEXT: v_lshl_b64 v[9:10], v[4:5], v9 -; GCN-NEXT: v_or_b32_e32 v15, v13, v15 -; GCN-NEXT: v_or_b32_e32 v14, v12, v14 +; GCN-NEXT: v_subrev_i32_e64 v8, s[8:9], 64, v12 +; GCN-NEXT: v_or_b32_e32 v16, v11, v9 +; GCN-NEXT: v_lshl_b64 v[8:9], v[4:5], v8 ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] -; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] -; GCN-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] -; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], v8 -; GCN-NEXT: v_lshl_b64 v[4:5], v[4:5], v12 -; GCN-NEXT: v_cndmask_b32_e64 v6, v9, v6, s[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v9, v10, v11, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v7, v9, v7, s[6:7] +; GCN-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; GCN-NEXT: v_or_b32_e32 v11, v13, v15 +; GCN-NEXT: v_or_b32_e32 v10, v12, v14 +; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] +; GCN-NEXT: v_lshl_b64 v[0:1], v[0:1], v20 +; GCN-NEXT: v_lshl_b64 v[4:5], v[4:5], v17 +; GCN-NEXT: v_cndmask_b32_e64 v6, v8, v6, s[6:7] +; GCN-NEXT: v_cndmask_b32_e64 v8, v9, v16, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v7, v8, v7, s[6:7] ; GCN-NEXT: v_cndmask_b32_e32 v0, 0, v0, vcc ; GCN-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc ; GCN-NEXT: v_cndmask_b32_e64 v4, 0, v4, s[4:5] @@ -333,42 +335,42 @@ ; GCN-LABEL: v_lshr_v2i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_sub_i32_e32 v16, vcc, 64, v8 -; GCN-NEXT: v_lshl_b64 v[16:17], v[2:3], v16 -; GCN-NEXT: v_lshr_b64 v[18:19], v[0:1], v8 -; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[8:9] -; GCN-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] -; GCN-NEXT: v_or_b32_e32 v11, v9, v11 -; GCN-NEXT: v_subrev_i32_e64 v9, s[6:7], 64, v8 -; GCN-NEXT: v_or_b32_e32 v19, v19, v17 -; GCN-NEXT: v_or_b32_e32 v18, v18, v16 +; GCN-NEXT: v_sub_i32_e32 v20, vcc, 64, v8 +; GCN-NEXT: v_lshl_b64 v[16:17], v[2:3], v20 +; GCN-NEXT: v_lshr_b64 v[18:19], v[0:1], v20 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GCN-NEXT: v_or_b32_e32 v10, v8, v10 -; GCN-NEXT: v_lshr_b64 v[16:17], v[2:3], v9 -; GCN-NEXT: s_and_b64 vcc, s[4:5], vcc +; GCN-NEXT: v_cmp_gt_u64_e64 s[4:5], 64, v[8:9] +; GCN-NEXT: v_subrev_i32_e64 v8, s[6:7], 64, v8 +; GCN-NEXT: v_or_b32_e32 v11, v9, v11 +; GCN-NEXT: v_lshr_b64 v[8:9], v[2:3], v8 +; GCN-NEXT: v_or_b32_e32 v17, v19, v17 +; GCN-NEXT: v_or_b32_e32 v16, v18, v16 +; GCN-NEXT: s_and_b64 vcc, vcc, s[4:5] ; GCN-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] -; GCN-NEXT: v_cndmask_b32_e32 v9, v16, v18, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, v9, v0, s[4:5] -; GCN-NEXT: v_sub_i32_e64 v9, s[6:7], 64, v12 -; GCN-NEXT: v_cndmask_b32_e32 v11, v17, v19, vcc -; GCN-NEXT: v_lshl_b64 v[9:10], v[6:7], v9 -; GCN-NEXT: v_lshr_b64 v[16:17], v[4:5], v12 -; GCN-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] -; GCN-NEXT: v_or_b32_e32 v16, v16, v9 +; GCN-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v16, v9, v17, vcc +; GCN-NEXT: v_sub_i32_e64 v17, s[6:7], 64, v12 +; GCN-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] +; GCN-NEXT: v_lshl_b64 v[8:9], v[6:7], v17 +; GCN-NEXT: v_lshr_b64 v[10:11], v[4:5], v17 +; GCN-NEXT: v_cndmask_b32_e64 v1, v16, v1, s[4:5] +; GCN-NEXT: v_or_b32_e32 v10, v10, v8 ; GCN-NEXT: v_cmp_gt_u64_e64 s[4:5], 64, v[12:13] ; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] -; GCN-NEXT: v_subrev_i32_e64 v9, s[8:9], 64, v12 -; GCN-NEXT: v_or_b32_e32 v11, v17, v10 -; GCN-NEXT: v_lshr_b64 v[9:10], v[6:7], v9 -; GCN-NEXT: v_or_b32_e32 v15, v13, v15 -; GCN-NEXT: v_or_b32_e32 v14, v12, v14 +; GCN-NEXT: v_subrev_i32_e64 v8, s[8:9], 64, v12 +; GCN-NEXT: v_or_b32_e32 v16, v11, v9 +; GCN-NEXT: v_lshr_b64 v[8:9], v[6:7], v8 ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] -; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] -; GCN-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] -; GCN-NEXT: v_lshr_b64 v[2:3], v[2:3], v8 -; GCN-NEXT: v_lshr_b64 v[6:7], v[6:7], v12 -; GCN-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v9, v10, v11, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] +; GCN-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; GCN-NEXT: v_or_b32_e32 v11, v13, v15 +; GCN-NEXT: v_or_b32_e32 v10, v12, v14 +; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] +; GCN-NEXT: v_lshr_b64 v[2:3], v[2:3], v20 +; GCN-NEXT: v_lshr_b64 v[6:7], v[6:7], v17 +; GCN-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7] +; GCN-NEXT: v_cndmask_b32_e64 v8, v9, v16, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7] ; GCN-NEXT: v_cndmask_b32_e32 v2, 0, v2, vcc ; GCN-NEXT: v_cndmask_b32_e32 v3, 0, v3, vcc ; GCN-NEXT: v_cndmask_b32_e64 v6, 0, v6, s[4:5] @@ -382,45 +384,45 @@ ; GCN-LABEL: v_ashr_v2i128_vv: ; GCN: ; %bb.0: ; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GCN-NEXT: v_sub_i32_e32 v16, vcc, 64, v8 -; GCN-NEXT: v_lshl_b64 v[16:17], v[2:3], v16 -; GCN-NEXT: v_lshr_b64 v[18:19], v[0:1], v8 -; GCN-NEXT: v_cmp_gt_u64_e32 vcc, 64, v[8:9] -; GCN-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] -; GCN-NEXT: v_or_b32_e32 v11, v9, v11 -; GCN-NEXT: v_subrev_i32_e64 v9, s[6:7], 64, v8 -; GCN-NEXT: v_or_b32_e32 v19, v19, v17 -; GCN-NEXT: v_or_b32_e32 v18, v18, v16 +; GCN-NEXT: v_sub_i32_e32 v20, vcc, 64, v8 +; GCN-NEXT: v_lshl_b64 v[16:17], v[2:3], v20 +; GCN-NEXT: v_lshr_b64 v[18:19], v[0:1], v20 +; GCN-NEXT: v_cmp_eq_u64_e32 vcc, 0, v[10:11] ; GCN-NEXT: v_or_b32_e32 v10, v8, v10 -; GCN-NEXT: v_ashr_i64 v[16:17], v[2:3], v9 -; GCN-NEXT: s_and_b64 vcc, s[4:5], vcc +; GCN-NEXT: v_cmp_gt_u64_e64 s[4:5], 64, v[8:9] +; GCN-NEXT: v_subrev_i32_e64 v8, s[6:7], 64, v8 +; GCN-NEXT: v_or_b32_e32 v11, v9, v11 +; GCN-NEXT: v_ashr_i64 v[8:9], v[2:3], v8 +; GCN-NEXT: v_or_b32_e32 v17, v19, v17 +; GCN-NEXT: v_or_b32_e32 v16, v18, v16 +; GCN-NEXT: s_and_b64 vcc, vcc, s[4:5] ; GCN-NEXT: v_cmp_eq_u64_e64 s[4:5], 0, v[10:11] -; GCN-NEXT: v_cndmask_b32_e32 v9, v16, v18, vcc -; GCN-NEXT: v_cndmask_b32_e64 v0, v9, v0, s[4:5] -; GCN-NEXT: v_sub_i32_e64 v9, s[6:7], 64, v12 -; GCN-NEXT: v_cndmask_b32_e32 v11, v17, v19, vcc -; GCN-NEXT: v_lshl_b64 v[9:10], v[6:7], v9 -; GCN-NEXT: v_lshr_b64 v[16:17], v[4:5], v12 -; GCN-NEXT: v_cndmask_b32_e64 v1, v11, v1, s[4:5] -; GCN-NEXT: v_or_b32_e32 v16, v16, v9 +; GCN-NEXT: v_cndmask_b32_e32 v8, v8, v16, vcc +; GCN-NEXT: v_cndmask_b32_e32 v16, v9, v17, vcc +; GCN-NEXT: v_sub_i32_e64 v17, s[6:7], 64, v12 +; GCN-NEXT: v_cndmask_b32_e64 v0, v8, v0, s[4:5] +; GCN-NEXT: v_lshl_b64 v[8:9], v[6:7], v17 +; GCN-NEXT: v_lshr_b64 v[10:11], v[4:5], v17 +; GCN-NEXT: v_cndmask_b32_e64 v1, v16, v1, s[4:5] +; GCN-NEXT: v_or_b32_e32 v10, v10, v8 ; GCN-NEXT: v_cmp_gt_u64_e64 s[4:5], 64, v[12:13] ; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] -; GCN-NEXT: v_subrev_i32_e64 v9, s[8:9], 64, v12 -; GCN-NEXT: v_or_b32_e32 v11, v17, v10 -; GCN-NEXT: v_ashr_i64 v[9:10], v[6:7], v9 -; GCN-NEXT: v_or_b32_e32 v15, v13, v15 -; GCN-NEXT: v_or_b32_e32 v14, v12, v14 +; GCN-NEXT: v_subrev_i32_e64 v8, s[8:9], 64, v12 +; GCN-NEXT: v_or_b32_e32 v16, v11, v9 +; GCN-NEXT: v_ashr_i64 v[8:9], v[6:7], v8 ; GCN-NEXT: s_and_b64 s[4:5], s[6:7], s[4:5] -; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[14:15] -; GCN-NEXT: v_cndmask_b32_e64 v9, v9, v16, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v4, v9, v4, s[6:7] -; GCN-NEXT: v_cndmask_b32_e64 v9, v10, v11, s[4:5] -; GCN-NEXT: v_cndmask_b32_e64 v5, v9, v5, s[6:7] -; GCN-NEXT: v_ashr_i64 v[8:9], v[2:3], v8 +; GCN-NEXT: v_cndmask_b32_e64 v8, v8, v10, s[4:5] +; GCN-NEXT: v_or_b32_e32 v11, v13, v15 +; GCN-NEXT: v_or_b32_e32 v10, v12, v14 +; GCN-NEXT: v_cmp_eq_u64_e64 s[6:7], 0, v[10:11] +; GCN-NEXT: v_cndmask_b32_e64 v4, v8, v4, s[6:7] +; GCN-NEXT: v_cndmask_b32_e64 v8, v9, v16, s[4:5] +; GCN-NEXT: v_cndmask_b32_e64 v5, v8, v5, s[6:7] +; GCN-NEXT: v_ashr_i64 v[8:9], v[2:3], v20 ; GCN-NEXT: v_ashrrev_i32_e32 v3, 31, v3 ; GCN-NEXT: v_cndmask_b32_e32 v2, v3, v8, vcc ; GCN-NEXT: v_cndmask_b32_e32 v3, v3, v9, vcc -; GCN-NEXT: v_ashr_i64 v[8:9], v[6:7], v12 +; GCN-NEXT: v_ashr_i64 v[8:9], v[6:7], v17 ; GCN-NEXT: v_ashrrev_i32_e32 v7, 31, v7 ; GCN-NEXT: v_cndmask_b32_e64 v6, v7, v8, s[4:5] ; GCN-NEXT: v_cndmask_b32_e64 v7, v7, v9, s[4:5] @@ -441,59 +443,59 @@ ; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 ; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 ; GCN-NEXT: s_sub_i32 s22, 64, s8 -; GCN-NEXT: s_sub_i32 s20, s8, 64 -; GCN-NEXT: s_lshr_b64 s[22:23], s[0:1], s22 +; GCN-NEXT: s_lshl_b64 s[20:21], s[0:1], s22 ; GCN-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-NEXT: s_lshl_b64 s[18:19], s[2:3], s8 -; GCN-NEXT: s_lshl_b64 s[20:21], s[0:1], s20 -; GCN-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s19, s19, s21 -; GCN-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 -; GCN-NEXT: s_and_b64 s[22:23], s[10:11], exec -; GCN-NEXT: s_cselect_b32 s9, s3, s19 -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s3, s18, s20 -; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec -; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 -; GCN-NEXT: s_cselect_b32 s22, s2, s3 -; GCN-NEXT: s_and_b64 s[2:3], s[18:19], s[10:11] -; GCN-NEXT: s_sub_i32 s18, 64, s12 -; GCN-NEXT: s_sub_i32 s10, s12, 64 -; GCN-NEXT: s_lshr_b64 s[18:19], s[4:5], s18 -; GCN-NEXT: s_lshl_b64 s[20:21], s[6:7], s12 -; GCN-NEXT: s_lshl_b64 s[10:11], s[4:5], s10 +; GCN-NEXT: s_and_b64 s[18:19], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s23, s21, 0 +; GCN-NEXT: s_cselect_b32 s24, s20, 0 +; GCN-NEXT: s_lshr_b64 s[18:19], s[0:1], s22 +; GCN-NEXT: s_lshl_b64 s[20:21], s[2:3], s22 ; GCN-NEXT: s_or_b64 s[18:19], s[20:21], s[18:19] -; GCN-NEXT: s_and_b64 s[20:21], s[2:3], exec -; GCN-NEXT: s_cselect_b32 s11, s19, s11 -; GCN-NEXT: s_or_b64 s[14:15], s[12:13], s[14:15] -; GCN-NEXT: v_cmp_eq_u64_e64 s[14:15], s[14:15], 0 -; GCN-NEXT: s_and_b64 s[20:21], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s13, s7, s11 -; GCN-NEXT: s_and_b64 s[20:21], s[2:3], exec -; GCN-NEXT: s_cselect_b32 s7, s18, s10 -; GCN-NEXT: s_and_b64 s[10:11], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s10, s6, s7 -; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s8 -; GCN-NEXT: s_and_b64 s[6:7], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s6, s1, 0 -; GCN-NEXT: s_cselect_b32 s7, s0, 0 -; GCN-NEXT: s_lshl_b64 s[0:1], s[4:5], s12 -; GCN-NEXT: s_and_b64 s[2:3], s[2:3], exec -; GCN-NEXT: s_cselect_b32 s1, s1, 0 -; GCN-NEXT: s_cselect_b32 s0, s0, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s0 -; GCN-NEXT: v_mov_b32_e32 v1, s1 -; GCN-NEXT: v_mov_b32_e32 v2, s10 -; GCN-NEXT: v_mov_b32_e32 v3, s13 +; GCN-NEXT: s_sub_i32 s20, s8, 64 +; GCN-NEXT: s_lshl_b64 s[0:1], s[0:1], s20 +; GCN-NEXT: s_and_b64 s[20:21], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s1, s19, s1 +; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 +; GCN-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s19, s3, s1 +; GCN-NEXT: s_and_b64 s[10:11], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s3, s18, s0 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], exec +; GCN-NEXT: v_cmp_lt_u64_e64 s[0:1], s[12:13], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 +; GCN-NEXT: s_cselect_b32 s16, s2, s3 +; GCN-NEXT: s_sub_i32 s10, 64, s12 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], s[0:1] +; GCN-NEXT: s_lshl_b64 s[2:3], s[4:5], s10 +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s17, s3, 0 +; GCN-NEXT: s_cselect_b32 s18, s2, 0 +; GCN-NEXT: s_lshr_b64 s[2:3], s[4:5], s10 +; GCN-NEXT: s_lshl_b64 s[8:9], s[6:7], s10 +; GCN-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] +; GCN-NEXT: s_sub_i32 s8, s12, 64 +; GCN-NEXT: s_lshl_b64 s[4:5], s[4:5], s8 +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s3, s3, s5 +; GCN-NEXT: s_or_b64 s[8:9], s[12:13], s[14:15] +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 +; GCN-NEXT: v_mov_b32_e32 v0, s18 +; GCN-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s3, s7, s3 +; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s2, s2, s4 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s0, s6, s2 +; GCN-NEXT: v_mov_b32_e32 v1, s17 +; GCN-NEXT: v_mov_b32_e32 v2, s0 +; GCN-NEXT: v_mov_b32_e32 v3, s3 ; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mov_b32_e32 v0, s7 -; GCN-NEXT: v_mov_b32_e32 v1, s6 -; GCN-NEXT: v_mov_b32_e32 v2, s22 -; GCN-NEXT: v_mov_b32_e32 v3, s9 +; GCN-NEXT: v_mov_b32_e32 v0, s24 +; GCN-NEXT: v_mov_b32_e32 v1, s23 +; GCN-NEXT: v_mov_b32_e32 v2, s16 +; GCN-NEXT: v_mov_b32_e32 v3, s19 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = shl <2 x i128> %lhs, %rhs @@ -513,59 +515,59 @@ ; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 ; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 ; GCN-NEXT: s_sub_i32 s22, 64, s8 -; GCN-NEXT: s_sub_i32 s20, s8, 64 -; GCN-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 +; GCN-NEXT: s_lshr_b64 s[20:21], s[2:3], s22 ; GCN-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-NEXT: s_lshr_b64 s[18:19], s[0:1], s8 -; GCN-NEXT: s_lshr_b64 s[20:21], s[2:3], s20 -; GCN-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s19, s19, s21 -; GCN-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 -; GCN-NEXT: s_and_b64 s[22:23], s[10:11], exec -; GCN-NEXT: s_cselect_b32 s9, s1, s19 -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s1, s18, s20 -; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec -; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 -; GCN-NEXT: s_cselect_b32 s22, s0, s1 -; GCN-NEXT: s_and_b64 s[0:1], s[18:19], s[10:11] -; GCN-NEXT: s_sub_i32 s18, 64, s12 -; GCN-NEXT: s_sub_i32 s10, s12, 64 -; GCN-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; GCN-NEXT: s_lshr_b64 s[20:21], s[4:5], s12 -; GCN-NEXT: s_lshr_b64 s[10:11], s[6:7], s10 +; GCN-NEXT: s_and_b64 s[18:19], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s23, s21, 0 +; GCN-NEXT: s_cselect_b32 s24, s20, 0 +; GCN-NEXT: s_lshl_b64 s[18:19], s[2:3], s22 +; GCN-NEXT: s_lshr_b64 s[20:21], s[0:1], s22 ; GCN-NEXT: s_or_b64 s[18:19], s[20:21], s[18:19] -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s11, s19, s11 -; GCN-NEXT: s_or_b64 s[14:15], s[12:13], s[14:15] -; GCN-NEXT: v_cmp_eq_u64_e64 s[14:15], s[14:15], 0 -; GCN-NEXT: s_and_b64 s[20:21], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s13, s5, s11 -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s5, s18, s10 -; GCN-NEXT: s_and_b64 s[10:11], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s10, s4, s5 -; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s8 -; GCN-NEXT: s_and_b64 s[4:5], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s4, s3, 0 -; GCN-NEXT: s_cselect_b32 s5, s2, 0 -; GCN-NEXT: s_lshr_b64 s[2:3], s[6:7], s12 +; GCN-NEXT: s_sub_i32 s20, s8, 64 +; GCN-NEXT: s_lshr_b64 s[2:3], s[2:3], s20 +; GCN-NEXT: s_and_b64 s[20:21], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s3, s19, s3 +; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 +; GCN-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s19, s1, s3 +; GCN-NEXT: s_and_b64 s[10:11], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s1, s18, s2 +; GCN-NEXT: s_and_b64 s[2:3], s[8:9], exec +; GCN-NEXT: v_cmp_lt_u64_e64 s[2:3], s[12:13], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 +; GCN-NEXT: s_cselect_b32 s16, s0, s1 +; GCN-NEXT: s_sub_i32 s10, 64, s12 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], s[2:3] +; GCN-NEXT: s_lshr_b64 s[2:3], s[6:7], s10 +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s17, s3, 0 +; GCN-NEXT: s_cselect_b32 s18, s2, 0 +; GCN-NEXT: s_lshl_b64 s[2:3], s[6:7], s10 +; GCN-NEXT: s_lshr_b64 s[8:9], s[4:5], s10 +; GCN-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] +; GCN-NEXT: s_sub_i32 s8, s12, 64 +; GCN-NEXT: s_lshr_b64 s[6:7], s[6:7], s8 +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s3, s3, s7 +; GCN-NEXT: s_or_b64 s[8:9], s[12:13], s[14:15] +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 +; GCN-NEXT: v_mov_b32_e32 v2, s18 +; GCN-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s3, s5, s3 ; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s3, 0 -; GCN-NEXT: s_cselect_b32 s1, s2, 0 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mov_b32_e32 v1, s13 -; GCN-NEXT: v_mov_b32_e32 v2, s1 -; GCN-NEXT: v_mov_b32_e32 v3, s0 +; GCN-NEXT: s_cselect_b32 s2, s2, s6 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s0, s4, s2 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_mov_b32_e32 v3, s17 ; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mov_b32_e32 v0, s22 -; GCN-NEXT: v_mov_b32_e32 v1, s9 -; GCN-NEXT: v_mov_b32_e32 v2, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s16 +; GCN-NEXT: v_mov_b32_e32 v1, s19 +; GCN-NEXT: v_mov_b32_e32 v2, s24 +; GCN-NEXT: v_mov_b32_e32 v3, s23 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = lshr <2 x i128> %lhs, %rhs @@ -585,61 +587,61 @@ ; GCN-NEXT: v_cmp_lt_u64_e64 s[16:17], s[8:9], 64 ; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[10:11], 0 ; GCN-NEXT: s_sub_i32 s22, 64, s8 -; GCN-NEXT: s_sub_i32 s20, s8, 64 -; GCN-NEXT: s_lshl_b64 s[22:23], s[2:3], s22 +; GCN-NEXT: s_ashr_i32 s23, s3, 31 +; GCN-NEXT: s_ashr_i64 s[20:21], s[2:3], s22 ; GCN-NEXT: s_and_b64 s[16:17], s[18:19], s[16:17] -; GCN-NEXT: s_lshr_b64 s[18:19], s[0:1], s8 -; GCN-NEXT: s_ashr_i64 s[20:21], s[2:3], s20 -; GCN-NEXT: s_or_b64 s[18:19], s[18:19], s[22:23] -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s19, s19, s21 -; GCN-NEXT: s_or_b64 s[10:11], s[8:9], s[10:11] -; GCN-NEXT: v_cmp_eq_u64_e64 s[10:11], s[10:11], 0 -; GCN-NEXT: s_and_b64 s[22:23], s[10:11], exec -; GCN-NEXT: s_cselect_b32 s9, s1, s19 -; GCN-NEXT: s_and_b64 s[22:23], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s1, s18, s20 -; GCN-NEXT: s_and_b64 s[10:11], s[10:11], exec -; GCN-NEXT: v_cmp_lt_u64_e64 s[10:11], s[12:13], 64 -; GCN-NEXT: v_cmp_eq_u64_e64 s[18:19], s[14:15], 0 -; GCN-NEXT: s_cselect_b32 s22, s0, s1 -; GCN-NEXT: s_and_b64 s[0:1], s[18:19], s[10:11] -; GCN-NEXT: s_sub_i32 s18, 64, s12 -; GCN-NEXT: s_sub_i32 s10, s12, 64 -; GCN-NEXT: s_lshl_b64 s[18:19], s[6:7], s18 -; GCN-NEXT: s_lshr_b64 s[20:21], s[4:5], s12 -; GCN-NEXT: s_ashr_i64 s[10:11], s[6:7], s10 +; GCN-NEXT: s_and_b64 s[18:19], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s24, s21, s23 +; GCN-NEXT: s_cselect_b32 s23, s20, s23 +; GCN-NEXT: s_lshl_b64 s[18:19], s[2:3], s22 +; GCN-NEXT: s_lshr_b64 s[20:21], s[0:1], s22 ; GCN-NEXT: s_or_b64 s[18:19], s[20:21], s[18:19] -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s11, s19, s11 -; GCN-NEXT: s_or_b64 s[14:15], s[12:13], s[14:15] -; GCN-NEXT: v_cmp_eq_u64_e64 s[14:15], s[14:15], 0 -; GCN-NEXT: s_and_b64 s[20:21], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s13, s5, s11 -; GCN-NEXT: s_and_b64 s[20:21], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s5, s18, s10 -; GCN-NEXT: s_and_b64 s[10:11], s[14:15], exec -; GCN-NEXT: s_cselect_b32 s10, s4, s5 -; GCN-NEXT: s_ashr_i32 s11, s3, 31 -; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s8 -; GCN-NEXT: s_and_b64 s[4:5], s[16:17], exec -; GCN-NEXT: s_cselect_b32 s4, s3, s11 -; GCN-NEXT: s_cselect_b32 s5, s2, s11 -; GCN-NEXT: s_ashr_i32 s8, s7, 31 -; GCN-NEXT: s_ashr_i64 s[2:3], s[6:7], s12 +; GCN-NEXT: s_sub_i32 s20, s8, 64 +; GCN-NEXT: s_ashr_i64 s[2:3], s[2:3], s20 +; GCN-NEXT: s_and_b64 s[20:21], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s3, s19, s3 +; GCN-NEXT: s_or_b64 s[8:9], s[8:9], s[10:11] +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 +; GCN-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s19, s1, s3 +; GCN-NEXT: s_and_b64 s[10:11], s[16:17], exec +; GCN-NEXT: s_cselect_b32 s1, s18, s2 +; GCN-NEXT: s_and_b64 s[2:3], s[8:9], exec +; GCN-NEXT: v_cmp_lt_u64_e64 s[2:3], s[12:13], 64 +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[14:15], 0 +; GCN-NEXT: s_cselect_b32 s16, s0, s1 +; GCN-NEXT: s_sub_i32 s10, 64, s12 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], s[2:3] +; GCN-NEXT: s_ashr_i64 s[2:3], s[6:7], s10 +; GCN-NEXT: s_ashr_i32 s11, s7, 31 +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s17, s3, s11 +; GCN-NEXT: s_cselect_b32 s18, s2, s11 +; GCN-NEXT: s_lshl_b64 s[2:3], s[6:7], s10 +; GCN-NEXT: s_lshr_b64 s[8:9], s[4:5], s10 +; GCN-NEXT: s_or_b64 s[2:3], s[8:9], s[2:3] +; GCN-NEXT: s_sub_i32 s8, s12, 64 +; GCN-NEXT: s_ashr_i64 s[6:7], s[6:7], s8 +; GCN-NEXT: s_and_b64 s[8:9], s[0:1], exec +; GCN-NEXT: s_cselect_b32 s3, s3, s7 +; GCN-NEXT: s_or_b64 s[8:9], s[12:13], s[14:15] +; GCN-NEXT: v_cmp_eq_u64_e64 s[8:9], s[8:9], 0 +; GCN-NEXT: v_mov_b32_e32 v2, s18 +; GCN-NEXT: s_and_b64 s[10:11], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s3, s5, s3 ; GCN-NEXT: s_and_b64 s[0:1], s[0:1], exec -; GCN-NEXT: s_cselect_b32 s0, s3, s8 -; GCN-NEXT: s_cselect_b32 s1, s2, s8 -; GCN-NEXT: v_mov_b32_e32 v0, s10 -; GCN-NEXT: v_mov_b32_e32 v1, s13 -; GCN-NEXT: v_mov_b32_e32 v2, s1 -; GCN-NEXT: v_mov_b32_e32 v3, s0 +; GCN-NEXT: s_cselect_b32 s2, s2, s6 +; GCN-NEXT: s_and_b64 s[0:1], s[8:9], exec +; GCN-NEXT: s_cselect_b32 s0, s4, s2 +; GCN-NEXT: v_mov_b32_e32 v0, s0 +; GCN-NEXT: v_mov_b32_e32 v1, s3 +; GCN-NEXT: v_mov_b32_e32 v3, s17 ; GCN-NEXT: flat_store_dwordx4 v[6:7], v[0:3] ; GCN-NEXT: s_nop 0 -; GCN-NEXT: v_mov_b32_e32 v0, s22 -; GCN-NEXT: v_mov_b32_e32 v1, s9 -; GCN-NEXT: v_mov_b32_e32 v2, s5 -; GCN-NEXT: v_mov_b32_e32 v3, s4 +; GCN-NEXT: v_mov_b32_e32 v0, s16 +; GCN-NEXT: v_mov_b32_e32 v1, s19 +; GCN-NEXT: v_mov_b32_e32 v2, s23 +; GCN-NEXT: v_mov_b32_e32 v3, s24 ; GCN-NEXT: flat_store_dwordx4 v[4:5], v[0:3] ; GCN-NEXT: s_endpgm %shift = ashr <2 x i128> %lhs, %rhs diff --git a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll @@ -396,106 +396,108 @@ define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) { ; MIPS-LABEL: ashr_i128: ; MIPS: # %bb.0: # %entry -; MIPS-NEXT: lw $2, 28($sp) +; MIPS-NEXT: lw $10, 28($sp) ; MIPS-NEXT: addiu $1, $zero, 64 -; MIPS-NEXT: subu $9, $1, $2 -; MIPS-NEXT: sllv $10, $5, $9 -; MIPS-NEXT: andi $13, $9, 32 -; MIPS-NEXT: andi $3, $2, 32 -; MIPS-NEXT: addiu $11, $zero, 0 -; MIPS-NEXT: bnez $13, $BB5_2 -; MIPS-NEXT: addiu $12, $zero, 0 +; MIPS-NEXT: subu $1, $1, $10 +; MIPS-NEXT: andi $2, $1, 63 +; MIPS-NEXT: not $3, $2 +; MIPS-NEXT: srlv $9, $6, $2 +; MIPS-NEXT: andi $8, $1, 32 +; MIPS-NEXT: bnez $8, $BB5_3 +; MIPS-NEXT: addiu $15, $zero, 0 ; MIPS-NEXT: # %bb.1: # %entry -; MIPS-NEXT: move $12, $10 -; MIPS-NEXT: $BB5_2: # %entry -; MIPS-NEXT: not $8, $2 -; MIPS-NEXT: bnez $3, $BB5_5 -; MIPS-NEXT: srlv $14, $6, $2 -; MIPS-NEXT: # %bb.3: # %entry -; MIPS-NEXT: sll $1, $6, 1 -; MIPS-NEXT: srlv $11, $7, $2 -; MIPS-NEXT: sllv $1, $1, $8 -; MIPS-NEXT: or $15, $1, $11 -; MIPS-NEXT: bnez $13, $BB5_7 -; MIPS-NEXT: move $11, $14 -; MIPS-NEXT: # %bb.4: # %entry -; MIPS-NEXT: b $BB5_6 +; MIPS-NEXT: srlv $1, $7, $2 +; MIPS-NEXT: sll $11, $6, 1 +; MIPS-NEXT: sllv $11, $11, $3 +; MIPS-NEXT: or $gp, $11, $1 +; MIPS-NEXT: move $13, $9 +; MIPS-NEXT: bnez $8, $BB5_5 +; MIPS-NEXT: sllv $14, $5, $2 +; MIPS-NEXT: # %bb.2: # %entry +; MIPS-NEXT: b $BB5_4 ; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_5: -; MIPS-NEXT: bnez $13, $BB5_7 +; MIPS-NEXT: $BB5_3: +; MIPS-NEXT: addiu $13, $zero, 0 +; MIPS-NEXT: move $gp, $9 +; MIPS-NEXT: bnez $8, $BB5_5 +; MIPS-NEXT: sllv $14, $5, $2 +; MIPS-NEXT: $BB5_4: # %entry ; MIPS-NEXT: move $15, $14 -; MIPS-NEXT: $BB5_6: # %entry -; MIPS-NEXT: sllv $1, $4, $9 -; MIPS-NEXT: not $9, $9 -; MIPS-NEXT: srl $10, $5, 1 -; MIPS-NEXT: srlv $9, $10, $9 -; MIPS-NEXT: or $10, $1, $9 -; MIPS-NEXT: $BB5_7: # %entry -; MIPS-NEXT: addiu $24, $2, -64 -; MIPS-NEXT: sll $13, $4, 1 -; MIPS-NEXT: srav $14, $4, $24 -; MIPS-NEXT: andi $1, $24, 32 -; MIPS-NEXT: bnez $1, $BB5_10 +; MIPS-NEXT: $BB5_5: # %entry +; MIPS-NEXT: addiu $1, $10, -64 +; MIPS-NEXT: andi $24, $1, 63 +; MIPS-NEXT: sll $11, $4, 1 +; MIPS-NEXT: srav $12, $4, $24 +; MIPS-NEXT: andi $1, $1, 32 +; MIPS-NEXT: bnez $1, $BB5_8 ; MIPS-NEXT: sra $9, $4, 31 -; MIPS-NEXT: # %bb.8: # %entry +; MIPS-NEXT: # %bb.6: # %entry ; MIPS-NEXT: srlv $1, $5, $24 ; MIPS-NEXT: not $24, $24 -; MIPS-NEXT: sllv $24, $13, $24 -; MIPS-NEXT: or $25, $24, $1 -; MIPS-NEXT: move $24, $14 -; MIPS-NEXT: sltiu $14, $2, 64 -; MIPS-NEXT: beqz $14, $BB5_12 +; MIPS-NEXT: sllv $24, $11, $24 +; MIPS-NEXT: move $25, $12 +; MIPS-NEXT: sltiu $12, $10, 64 +; MIPS-NEXT: beqz $12, $BB5_10 +; MIPS-NEXT: or $24, $24, $1 +; MIPS-NEXT: # %bb.7: # %entry +; MIPS-NEXT: b $BB5_9 ; MIPS-NEXT: nop -; MIPS-NEXT: # %bb.9: # %entry -; MIPS-NEXT: b $BB5_11 +; MIPS-NEXT: $BB5_8: +; MIPS-NEXT: move $24, $12 +; MIPS-NEXT: sltiu $12, $10, 64 +; MIPS-NEXT: beqz $12, $BB5_10 +; MIPS-NEXT: move $25, $9 +; MIPS-NEXT: $BB5_9: +; MIPS-NEXT: or $24, $gp, $15 +; MIPS-NEXT: $BB5_10: # %entry +; MIPS-NEXT: beqz $8, $BB5_17 ; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_10: -; MIPS-NEXT: move $25, $14 -; MIPS-NEXT: sltiu $14, $2, 64 -; MIPS-NEXT: beqz $14, $BB5_12 -; MIPS-NEXT: move $24, $9 -; MIPS-NEXT: $BB5_11: -; MIPS-NEXT: or $25, $15, $12 +; MIPS-NEXT: # %bb.11: # %entry +; MIPS-NEXT: bnez $12, $BB5_18 +; MIPS-NEXT: sltiu $10, $10, 1 ; MIPS-NEXT: $BB5_12: # %entry -; MIPS-NEXT: sltiu $12, $2, 1 -; MIPS-NEXT: beqz $12, $BB5_18 +; MIPS-NEXT: beqz $10, $BB5_19 ; MIPS-NEXT: nop -; MIPS-NEXT: # %bb.13: # %entry -; MIPS-NEXT: bnez $14, $BB5_19 +; MIPS-NEXT: $BB5_13: # %entry +; MIPS-NEXT: beqz $10, $BB5_20 ; MIPS-NEXT: nop ; MIPS-NEXT: $BB5_14: # %entry -; MIPS-NEXT: beqz $12, $BB5_20 -; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_15: # %entry -; MIPS-NEXT: bnez $3, $BB5_21 +; MIPS-NEXT: bnez $8, $BB5_21 ; MIPS-NEXT: srav $4, $4, $2 -; MIPS-NEXT: $BB5_16: # %entry +; MIPS-NEXT: $BB5_15: # %entry ; MIPS-NEXT: srlv $1, $5, $2 -; MIPS-NEXT: sllv $2, $13, $8 +; MIPS-NEXT: sllv $2, $11, $3 ; MIPS-NEXT: or $3, $2, $1 -; MIPS-NEXT: bnez $14, $BB5_23 +; MIPS-NEXT: bnez $12, $BB5_23 ; MIPS-NEXT: move $2, $4 -; MIPS-NEXT: # %bb.17: # %entry +; MIPS-NEXT: # %bb.16: # %entry ; MIPS-NEXT: b $BB5_22 ; MIPS-NEXT: nop -; MIPS-NEXT: $BB5_18: # %entry -; MIPS-NEXT: beqz $14, $BB5_14 -; MIPS-NEXT: move $7, $25 -; MIPS-NEXT: $BB5_19: -; MIPS-NEXT: bnez $12, $BB5_15 -; MIPS-NEXT: or $24, $11, $10 +; MIPS-NEXT: $BB5_17: # %entry +; MIPS-NEXT: sllv $1, $4, $2 +; MIPS-NEXT: srl $14, $5, 1 +; MIPS-NEXT: srlv $14, $14, $3 +; MIPS-NEXT: or $14, $1, $14 +; MIPS-NEXT: beqz $12, $BB5_12 +; MIPS-NEXT: sltiu $10, $10, 1 +; MIPS-NEXT: $BB5_18: +; MIPS-NEXT: bnez $10, $BB5_13 +; MIPS-NEXT: or $25, $13, $14 +; MIPS-NEXT: $BB5_19: # %entry +; MIPS-NEXT: bnez $10, $BB5_14 +; MIPS-NEXT: move $6, $25 ; MIPS-NEXT: $BB5_20: # %entry -; MIPS-NEXT: move $6, $24 -; MIPS-NEXT: beqz $3, $BB5_16 +; MIPS-NEXT: move $7, $24 +; MIPS-NEXT: beqz $8, $BB5_15 ; MIPS-NEXT: srav $4, $4, $2 ; MIPS-NEXT: $BB5_21: ; MIPS-NEXT: move $2, $9 -; MIPS-NEXT: bnez $14, $BB5_23 +; MIPS-NEXT: bnez $12, $BB5_23 ; MIPS-NEXT: move $3, $4 ; MIPS-NEXT: $BB5_22: # %entry ; MIPS-NEXT: move $2, $9 ; MIPS-NEXT: $BB5_23: # %entry -; MIPS-NEXT: bnez $14, $BB5_25 +; MIPS-NEXT: bnez $12, $BB5_25 ; MIPS-NEXT: nop ; MIPS-NEXT: # %bb.24: # %entry ; MIPS-NEXT: move $3, $9 @@ -506,188 +508,204 @@ ; ; MIPS32-LABEL: ashr_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $9, 28($sp) -; MIPS32-NEXT: srlv $1, $7, $9 -; MIPS32-NEXT: not $2, $9 -; MIPS32-NEXT: sll $3, $6, 1 -; MIPS32-NEXT: sllv $3, $3, $2 -; MIPS32-NEXT: addiu $8, $zero, 64 -; MIPS32-NEXT: or $1, $3, $1 -; MIPS32-NEXT: srlv $10, $6, $9 -; MIPS32-NEXT: subu $3, $8, $9 -; MIPS32-NEXT: sllv $11, $5, $3 -; MIPS32-NEXT: andi $12, $3, 32 -; MIPS32-NEXT: andi $13, $9, 32 -; MIPS32-NEXT: move $8, $11 -; MIPS32-NEXT: movn $8, $zero, $12 -; MIPS32-NEXT: movn $1, $10, $13 -; MIPS32-NEXT: addiu $14, $9, -64 -; MIPS32-NEXT: srlv $15, $5, $14 +; MIPS32-NEXT: lw $2, 28($sp) +; MIPS32-NEXT: addiu $1, $zero, 64 +; MIPS32-NEXT: subu $1, $1, $2 +; MIPS32-NEXT: andi $9, $1, 63 +; MIPS32-NEXT: sllv $3, $5, $9 +; MIPS32-NEXT: andi $10, $1, 32 +; MIPS32-NEXT: move $1, $3 +; MIPS32-NEXT: movn $1, $zero, $10 +; MIPS32-NEXT: srlv $8, $7, $9 +; MIPS32-NEXT: not $11, $9 +; MIPS32-NEXT: sll $12, $6, 1 +; MIPS32-NEXT: sllv $12, $12, $11 +; MIPS32-NEXT: or $8, $12, $8 +; MIPS32-NEXT: srlv $12, $6, $9 +; MIPS32-NEXT: movn $8, $12, $10 +; MIPS32-NEXT: addiu $13, $2, -64 +; MIPS32-NEXT: andi $14, $13, 63 +; MIPS32-NEXT: or $15, $8, $1 +; MIPS32-NEXT: srlv $1, $5, $14 ; MIPS32-NEXT: sll $24, $4, 1 -; MIPS32-NEXT: not $25, $14 -; MIPS32-NEXT: sllv $25, $24, $25 -; MIPS32-NEXT: or $gp, $1, $8 -; MIPS32-NEXT: or $1, $25, $15 +; MIPS32-NEXT: not $8, $14 +; MIPS32-NEXT: sllv $8, $24, $8 +; MIPS32-NEXT: or $1, $8, $1 ; MIPS32-NEXT: srav $8, $4, $14 -; MIPS32-NEXT: andi $14, $14, 32 -; MIPS32-NEXT: movn $1, $8, $14 -; MIPS32-NEXT: sllv $15, $4, $3 -; MIPS32-NEXT: not $3, $3 +; MIPS32-NEXT: andi $13, $13, 32 +; MIPS32-NEXT: movn $1, $8, $13 +; MIPS32-NEXT: sltiu $14, $2, 64 +; MIPS32-NEXT: movn $1, $15, $14 +; MIPS32-NEXT: movn $12, $zero, $10 +; MIPS32-NEXT: sllv $15, $4, $9 ; MIPS32-NEXT: srl $25, $5, 1 -; MIPS32-NEXT: srlv $3, $25, $3 -; MIPS32-NEXT: sltiu $25, $9, 64 -; MIPS32-NEXT: movn $1, $gp, $25 -; MIPS32-NEXT: or $15, $15, $3 -; MIPS32-NEXT: srlv $3, $5, $9 -; MIPS32-NEXT: sllv $2, $24, $2 -; MIPS32-NEXT: or $5, $2, $3 -; MIPS32-NEXT: srav $24, $4, $9 -; MIPS32-NEXT: movn $5, $24, $13 -; MIPS32-NEXT: sra $2, $4, 31 -; MIPS32-NEXT: movz $1, $7, $9 -; MIPS32-NEXT: move $3, $2 -; MIPS32-NEXT: movn $3, $5, $25 -; MIPS32-NEXT: movn $15, $11, $12 -; MIPS32-NEXT: movn $10, $zero, $13 -; MIPS32-NEXT: or $4, $10, $15 -; MIPS32-NEXT: movn $8, $2, $14 -; MIPS32-NEXT: movn $8, $4, $25 -; MIPS32-NEXT: movz $8, $6, $9 -; MIPS32-NEXT: movn $24, $2, $13 -; MIPS32-NEXT: movn $2, $24, $25 +; MIPS32-NEXT: srlv $25, $25, $11 +; MIPS32-NEXT: or $15, $15, $25 +; MIPS32-NEXT: movn $15, $3, $10 +; MIPS32-NEXT: or $12, $12, $15 +; MIPS32-NEXT: sra $3, $4, 31 +; MIPS32-NEXT: movn $8, $3, $13 +; MIPS32-NEXT: movn $8, $12, $14 +; MIPS32-NEXT: srav $4, $4, $9 +; MIPS32-NEXT: movz $8, $6, $2 +; MIPS32-NEXT: movz $1, $7, $2 +; MIPS32-NEXT: move $6, $4 +; MIPS32-NEXT: movn $6, $3, $10 +; MIPS32-NEXT: move $2, $3 +; MIPS32-NEXT: movn $2, $6, $14 +; MIPS32-NEXT: srlv $5, $5, $9 +; MIPS32-NEXT: sllv $6, $24, $11 +; MIPS32-NEXT: or $5, $6, $5 +; MIPS32-NEXT: movn $5, $4, $10 +; MIPS32-NEXT: movn $3, $5, $14 ; MIPS32-NEXT: move $4, $8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: move $5, $1 ; ; 32R2-LABEL: ashr_i128: ; 32R2: # %bb.0: # %entry -; 32R2-NEXT: lw $9, 28($sp) -; 32R2-NEXT: srlv $1, $7, $9 -; 32R2-NEXT: not $2, $9 -; 32R2-NEXT: sll $3, $6, 1 -; 32R2-NEXT: sllv $3, $3, $2 -; 32R2-NEXT: addiu $8, $zero, 64 -; 32R2-NEXT: or $1, $3, $1 -; 32R2-NEXT: srlv $10, $6, $9 -; 32R2-NEXT: subu $3, $8, $9 -; 32R2-NEXT: sllv $11, $5, $3 -; 32R2-NEXT: andi $12, $3, 32 -; 32R2-NEXT: andi $13, $9, 32 -; 32R2-NEXT: move $8, $11 -; 32R2-NEXT: movn $8, $zero, $12 -; 32R2-NEXT: movn $1, $10, $13 -; 32R2-NEXT: addiu $14, $9, -64 -; 32R2-NEXT: srlv $15, $5, $14 +; 32R2-NEXT: lw $2, 28($sp) +; 32R2-NEXT: addiu $1, $zero, 64 +; 32R2-NEXT: subu $1, $1, $2 +; 32R2-NEXT: andi $9, $1, 63 +; 32R2-NEXT: sllv $3, $5, $9 +; 32R2-NEXT: andi $10, $1, 32 +; 32R2-NEXT: move $1, $3 +; 32R2-NEXT: movn $1, $zero, $10 +; 32R2-NEXT: srlv $8, $7, $9 +; 32R2-NEXT: not $11, $9 +; 32R2-NEXT: sll $12, $6, 1 +; 32R2-NEXT: sllv $12, $12, $11 +; 32R2-NEXT: or $8, $12, $8 +; 32R2-NEXT: srlv $12, $6, $9 +; 32R2-NEXT: movn $8, $12, $10 +; 32R2-NEXT: addiu $13, $2, -64 +; 32R2-NEXT: andi $14, $13, 63 +; 32R2-NEXT: or $15, $8, $1 +; 32R2-NEXT: srlv $1, $5, $14 ; 32R2-NEXT: sll $24, $4, 1 -; 32R2-NEXT: not $25, $14 -; 32R2-NEXT: sllv $25, $24, $25 -; 32R2-NEXT: or $gp, $1, $8 -; 32R2-NEXT: or $1, $25, $15 +; 32R2-NEXT: not $8, $14 +; 32R2-NEXT: sllv $8, $24, $8 +; 32R2-NEXT: or $1, $8, $1 ; 32R2-NEXT: srav $8, $4, $14 -; 32R2-NEXT: andi $14, $14, 32 -; 32R2-NEXT: movn $1, $8, $14 -; 32R2-NEXT: sllv $15, $4, $3 -; 32R2-NEXT: not $3, $3 +; 32R2-NEXT: andi $13, $13, 32 +; 32R2-NEXT: movn $1, $8, $13 +; 32R2-NEXT: sltiu $14, $2, 64 +; 32R2-NEXT: movn $1, $15, $14 +; 32R2-NEXT: movn $12, $zero, $10 +; 32R2-NEXT: sllv $15, $4, $9 ; 32R2-NEXT: srl $25, $5, 1 -; 32R2-NEXT: srlv $3, $25, $3 -; 32R2-NEXT: sltiu $25, $9, 64 -; 32R2-NEXT: movn $1, $gp, $25 -; 32R2-NEXT: or $15, $15, $3 -; 32R2-NEXT: srlv $3, $5, $9 -; 32R2-NEXT: sllv $2, $24, $2 -; 32R2-NEXT: or $5, $2, $3 -; 32R2-NEXT: srav $24, $4, $9 -; 32R2-NEXT: movn $5, $24, $13 -; 32R2-NEXT: sra $2, $4, 31 -; 32R2-NEXT: movz $1, $7, $9 -; 32R2-NEXT: move $3, $2 -; 32R2-NEXT: movn $3, $5, $25 -; 32R2-NEXT: movn $15, $11, $12 -; 32R2-NEXT: movn $10, $zero, $13 -; 32R2-NEXT: or $4, $10, $15 -; 32R2-NEXT: movn $8, $2, $14 -; 32R2-NEXT: movn $8, $4, $25 -; 32R2-NEXT: movz $8, $6, $9 -; 32R2-NEXT: movn $24, $2, $13 -; 32R2-NEXT: movn $2, $24, $25 +; 32R2-NEXT: srlv $25, $25, $11 +; 32R2-NEXT: or $15, $15, $25 +; 32R2-NEXT: movn $15, $3, $10 +; 32R2-NEXT: or $12, $12, $15 +; 32R2-NEXT: sra $3, $4, 31 +; 32R2-NEXT: movn $8, $3, $13 +; 32R2-NEXT: movn $8, $12, $14 +; 32R2-NEXT: srav $4, $4, $9 +; 32R2-NEXT: movz $8, $6, $2 +; 32R2-NEXT: movz $1, $7, $2 +; 32R2-NEXT: move $6, $4 +; 32R2-NEXT: movn $6, $3, $10 +; 32R2-NEXT: move $2, $3 +; 32R2-NEXT: movn $2, $6, $14 +; 32R2-NEXT: srlv $5, $5, $9 +; 32R2-NEXT: sllv $6, $24, $11 +; 32R2-NEXT: or $5, $6, $5 +; 32R2-NEXT: movn $5, $4, $10 +; 32R2-NEXT: movn $3, $5, $14 ; 32R2-NEXT: move $4, $8 ; 32R2-NEXT: jr $ra ; 32R2-NEXT: move $5, $1 ; ; 32R6-LABEL: ashr_i128: ; 32R6: # %bb.0: # %entry -; 32R6-NEXT: lw $3, 28($sp) -; 32R6-NEXT: addiu $1, $zero, 64 -; 32R6-NEXT: subu $1, $1, $3 -; 32R6-NEXT: sllv $2, $5, $1 -; 32R6-NEXT: andi $8, $1, 32 -; 32R6-NEXT: selnez $9, $2, $8 -; 32R6-NEXT: sllv $10, $4, $1 -; 32R6-NEXT: not $1, $1 -; 32R6-NEXT: srl $11, $5, 1 -; 32R6-NEXT: srlv $1, $11, $1 -; 32R6-NEXT: or $1, $10, $1 -; 32R6-NEXT: seleqz $1, $1, $8 -; 32R6-NEXT: or $1, $9, $1 -; 32R6-NEXT: srlv $9, $7, $3 -; 32R6-NEXT: not $10, $3 -; 32R6-NEXT: sll $11, $6, 1 -; 32R6-NEXT: sllv $11, $11, $10 -; 32R6-NEXT: or $9, $11, $9 -; 32R6-NEXT: andi $11, $3, 32 -; 32R6-NEXT: seleqz $9, $9, $11 -; 32R6-NEXT: srlv $12, $6, $3 -; 32R6-NEXT: selnez $13, $12, $11 -; 32R6-NEXT: seleqz $12, $12, $11 -; 32R6-NEXT: or $1, $12, $1 -; 32R6-NEXT: seleqz $2, $2, $8 -; 32R6-NEXT: or $8, $13, $9 -; 32R6-NEXT: addiu $9, $3, -64 -; 32R6-NEXT: srlv $12, $5, $9 -; 32R6-NEXT: sll $13, $4, 1 -; 32R6-NEXT: not $14, $9 -; 32R6-NEXT: sllv $14, $13, $14 -; 32R6-NEXT: sltiu $15, $3, 64 -; 32R6-NEXT: or $2, $8, $2 -; 32R6-NEXT: selnez $1, $1, $15 -; 32R6-NEXT: or $8, $14, $12 -; 32R6-NEXT: srav $12, $4, $9 -; 32R6-NEXT: andi $9, $9, 32 -; 32R6-NEXT: seleqz $14, $12, $9 -; 32R6-NEXT: sra $24, $4, 31 -; 32R6-NEXT: selnez $25, $24, $9 -; 32R6-NEXT: seleqz $8, $8, $9 -; 32R6-NEXT: or $14, $25, $14 -; 32R6-NEXT: seleqz $14, $14, $15 -; 32R6-NEXT: selnez $9, $12, $9 -; 32R6-NEXT: seleqz $12, $24, $15 -; 32R6-NEXT: or $1, $1, $14 -; 32R6-NEXT: selnez $14, $1, $3 -; 32R6-NEXT: selnez $1, $2, $15 -; 32R6-NEXT: or $2, $9, $8 -; 32R6-NEXT: srav $8, $4, $3 -; 32R6-NEXT: seleqz $4, $8, $11 -; 32R6-NEXT: selnez $9, $24, $11 +; 32R6-NEXT: addiu $sp, $sp, -16 +; 32R6-NEXT: .cfi_def_cfa_offset 16 +; 32R6-NEXT: sw $19, 12($sp) # 4-byte Folded Spill +; 32R6-NEXT: sw $18, 8($sp) # 4-byte Folded Spill +; 32R6-NEXT: sw $17, 4($sp) # 4-byte Folded Spill +; 32R6-NEXT: sw $16, 0($sp) # 4-byte Folded Spill +; 32R6-NEXT: .cfi_offset 19, -4 +; 32R6-NEXT: .cfi_offset 18, -8 +; 32R6-NEXT: .cfi_offset 17, -12 +; 32R6-NEXT: .cfi_offset 16, -16 +; 32R6-NEXT: lw $1, 44($sp) +; 32R6-NEXT: addiu $2, $zero, 64 +; 32R6-NEXT: subu $2, $2, $1 +; 32R6-NEXT: andi $3, $2, 63 +; 32R6-NEXT: sllv $8, $4, $3 +; 32R6-NEXT: not $9, $3 +; 32R6-NEXT: srl $10, $5, 1 +; 32R6-NEXT: srlv $10, $10, $9 +; 32R6-NEXT: or $8, $8, $10 +; 32R6-NEXT: srav $10, $4, $3 +; 32R6-NEXT: sllv $11, $5, $3 +; 32R6-NEXT: andi $2, $2, 32 +; 32R6-NEXT: selnez $12, $11, $2 +; 32R6-NEXT: seleqz $8, $8, $2 +; 32R6-NEXT: selnez $13, $10, $2 +; 32R6-NEXT: srlv $14, $5, $3 +; 32R6-NEXT: sll $15, $4, 1 +; 32R6-NEXT: sllv $24, $15, $9 +; 32R6-NEXT: or $14, $24, $14 +; 32R6-NEXT: seleqz $14, $14, $2 +; 32R6-NEXT: or $13, $13, $14 +; 32R6-NEXT: srlv $14, $6, $3 +; 32R6-NEXT: addiu $24, $1, -64 +; 32R6-NEXT: sltiu $25, $1, 64 +; 32R6-NEXT: sra $gp, $4, 31 +; 32R6-NEXT: or $8, $12, $8 +; 32R6-NEXT: seleqz $12, $14, $2 +; 32R6-NEXT: seleqz $10, $10, $2 +; 32R6-NEXT: selnez $16, $gp, $2 +; 32R6-NEXT: seleqz $17, $gp, $25 +; 32R6-NEXT: andi $18, $24, 32 +; 32R6-NEXT: seleqz $19, $6, $1 +; 32R6-NEXT: selnez $13, $13, $25 +; 32R6-NEXT: selnez $14, $14, $2 +; 32R6-NEXT: srlv $3, $7, $3 +; 32R6-NEXT: sll $6, $6, 1 +; 32R6-NEXT: sllv $6, $6, $9 +; 32R6-NEXT: or $3, $6, $3 +; 32R6-NEXT: seleqz $3, $3, $2 +; 32R6-NEXT: or $3, $14, $3 +; 32R6-NEXT: seleqz $2, $11, $2 +; 32R6-NEXT: or $2, $3, $2 +; 32R6-NEXT: or $3, $13, $17 +; 32R6-NEXT: or $6, $16, $10 +; 32R6-NEXT: seleqz $7, $7, $1 +; 32R6-NEXT: or $8, $12, $8 +; 32R6-NEXT: selnez $8, $8, $25 +; 32R6-NEXT: selnez $9, $gp, $18 +; 32R6-NEXT: andi $10, $24, 63 +; 32R6-NEXT: srav $11, $4, $10 +; 32R6-NEXT: seleqz $4, $11, $18 ; 32R6-NEXT: or $4, $9, $4 -; 32R6-NEXT: selnez $9, $4, $15 -; 32R6-NEXT: seleqz $2, $2, $15 -; 32R6-NEXT: seleqz $4, $6, $3 -; 32R6-NEXT: seleqz $6, $7, $3 -; 32R6-NEXT: or $1, $1, $2 -; 32R6-NEXT: selnez $1, $1, $3 -; 32R6-NEXT: or $1, $6, $1 -; 32R6-NEXT: or $4, $4, $14 -; 32R6-NEXT: or $2, $9, $12 -; 32R6-NEXT: srlv $3, $5, $3 -; 32R6-NEXT: sllv $5, $13, $10 -; 32R6-NEXT: or $3, $5, $3 -; 32R6-NEXT: seleqz $3, $3, $11 -; 32R6-NEXT: selnez $5, $8, $11 -; 32R6-NEXT: or $3, $5, $3 -; 32R6-NEXT: selnez $3, $3, $15 -; 32R6-NEXT: or $3, $3, $12 +; 32R6-NEXT: seleqz $4, $4, $25 +; 32R6-NEXT: or $4, $8, $4 +; 32R6-NEXT: selnez $4, $4, $1 +; 32R6-NEXT: or $4, $19, $4 +; 32R6-NEXT: selnez $2, $2, $25 +; 32R6-NEXT: srlv $5, $5, $10 +; 32R6-NEXT: not $8, $10 +; 32R6-NEXT: sllv $8, $15, $8 +; 32R6-NEXT: or $5, $8, $5 +; 32R6-NEXT: seleqz $5, $5, $18 +; 32R6-NEXT: selnez $8, $11, $18 +; 32R6-NEXT: or $5, $8, $5 +; 32R6-NEXT: seleqz $5, $5, $25 +; 32R6-NEXT: or $2, $2, $5 +; 32R6-NEXT: selnez $1, $2, $1 +; 32R6-NEXT: or $5, $7, $1 +; 32R6-NEXT: selnez $1, $6, $25 +; 32R6-NEXT: or $2, $1, $17 +; 32R6-NEXT: lw $16, 0($sp) # 4-byte Folded Reload +; 32R6-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; 32R6-NEXT: lw $18, 8($sp) # 4-byte Folded Reload +; 32R6-NEXT: lw $19, 12($sp) # 4-byte Folded Reload ; 32R6-NEXT: jr $ra -; 32R6-NEXT: move $5, $1 +; 32R6-NEXT: addiu $sp, $sp, 16 ; ; MIPS3-LABEL: ashr_i128: ; MIPS3: # %bb.0: # %entry @@ -766,169 +784,182 @@ ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 ; MMR3-NEXT: move $8, $7 -; MMR3-NEXT: sw $6, 32($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $5, 36($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $16, 76($sp) -; MMR3-NEXT: srlv $4, $7, $16 -; MMR3-NEXT: not16 $3, $16 -; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: sll16 $2, $6, 1 -; MMR3-NEXT: sllv $3, $2, $3 +; MMR3-NEXT: sw $5, 32($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $4, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $17, 76($sp) ; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: srlv $6, $6, $16 -; MMR3-NEXT: sw $6, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: subu16 $7, $2, $16 +; MMR3-NEXT: subu16 $2, $2, $17 +; MMR3-NEXT: andi16 $7, $2, 63 ; MMR3-NEXT: sllv $9, $5, $7 -; MMR3-NEXT: andi16 $2, $7, 32 -; MMR3-NEXT: sw $2, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $5, $16, 32 -; MMR3-NEXT: sw $5, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: move $4, $9 -; MMR3-NEXT: li16 $17, 0 -; MMR3-NEXT: movn $4, $17, $2 -; MMR3-NEXT: movn $3, $6, $5 -; MMR3-NEXT: addiu $2, $16, -64 -; MMR3-NEXT: lw $5, 36($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $5, $5, $2 -; MMR3-NEXT: sw $5, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $4, $2, 32 +; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill +; MMR3-NEXT: li16 $2, 0 +; MMR3-NEXT: move $16, $9 +; MMR3-NEXT: movn $16, $2, $4 +; MMR3-NEXT: srlv $3, $8, $7 +; MMR3-NEXT: not16 $5, $7 +; MMR3-NEXT: sw $5, 36($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: sll16 $2, $6, 1 +; MMR3-NEXT: sllv $2, $2, $5 +; MMR3-NEXT: or16 $2, $3 +; MMR3-NEXT: srlv $3, $6, $7 +; MMR3-NEXT: sw $3, 12($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $2, $3, $4 +; MMR3-NEXT: sw $17, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: addiu $3, $17, -64 +; MMR3-NEXT: sw $3, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $3, $3, 63 +; MMR3-NEXT: or16 $2, $16 +; MMR3-NEXT: lw $5, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $4, $5, $3 +; MMR3-NEXT: sw $4, 0($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $6, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: sll16 $4, $6, 1 +; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill +; MMR3-NEXT: not16 $16, $3 +; MMR3-NEXT: sllv $16, $4, $16 +; MMR3-NEXT: lw $4, 0($sp) # 4-byte Folded Reload +; MMR3-NEXT: or16 $16, $4 +; MMR3-NEXT: srav $1, $6, $3 +; MMR3-NEXT: lw $3, 4($sp) # 4-byte Folded Reload +; MMR3-NEXT: andi16 $3, $3, 32 +; MMR3-NEXT: sw $3, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: movn $16, $1, $3 +; MMR3-NEXT: sltiu $10, $17, 64 +; MMR3-NEXT: movn $16, $2, $10 ; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: sll16 $6, $17, 1 -; MMR3-NEXT: sw $6, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: not16 $5, $2 -; MMR3-NEXT: sllv $5, $6, $5 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: lw $4, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $5, $4 -; MMR3-NEXT: srav $1, $17, $2 -; MMR3-NEXT: andi16 $2, $2, 32 -; MMR3-NEXT: sw $2, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $5, $1, $2 -; MMR3-NEXT: sllv $2, $17, $7 -; MMR3-NEXT: not16 $4, $7 -; MMR3-NEXT: lw $7, 36($sp) # 4-byte Folded Reload -; MMR3-NEXT: srl16 $6, $7, 1 -; MMR3-NEXT: srlv $6, $6, $4 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $5, $3, $10 -; MMR3-NEXT: or16 $6, $2 -; MMR3-NEXT: srlv $2, $7, $16 -; MMR3-NEXT: lw $3, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: lw $4, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: li16 $2, 0 +; MMR3-NEXT: movn $4, $2, $17 +; MMR3-NEXT: sllv $2, $6, $7 +; MMR3-NEXT: sw $2, 12($sp) # 4-byte Folded Spill +; MMR3-NEXT: srl16 $3, $5, 1 +; MMR3-NEXT: lw $2, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $2, $3, $2 +; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: or16 $2, $3 +; MMR3-NEXT: movn $2, $9, $17 +; MMR3-NEXT: or16 $2, $4 +; MMR3-NEXT: sra $3, $6, 31 ; MMR3-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $3, $4, $3 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: srav $11, $17, $16 -; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $11, $4 -; MMR3-NEXT: sra $2, $17, 31 -; MMR3-NEXT: movz $5, $8, $16 -; MMR3-NEXT: move $8, $2 -; MMR3-NEXT: movn $8, $3, $10 -; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $6, $9, $3 -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: lw $7, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $7, $3, $4 +; MMR3-NEXT: movn $1, $3, $4 +; MMR3-NEXT: movn $1, $2, $10 +; MMR3-NEXT: srav $4, $6, $7 +; MMR3-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $1, $5, $2 +; MMR3-NEXT: movz $16, $8, $2 +; MMR3-NEXT: move $6, $4 +; MMR3-NEXT: movn $6, $3, $17 +; MMR3-NEXT: move $2, $3 +; MMR3-NEXT: movn $2, $6, $10 +; MMR3-NEXT: lw $5, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $6, $5, $7 +; MMR3-NEXT: lw $5, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: lw $7, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $7, $7, $5 ; MMR3-NEXT: or16 $7, $6 -; MMR3-NEXT: lw $3, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $1, $2, $3 -; MMR3-NEXT: movn $1, $7, $10 -; MMR3-NEXT: lw $3, 32($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $1, $3, $16 -; MMR3-NEXT: movn $11, $2, $4 -; MMR3-NEXT: movn $2, $11, $10 -; MMR3-NEXT: move $3, $8 +; MMR3-NEXT: movn $7, $4, $17 +; MMR3-NEXT: movn $3, $7, $10 ; MMR3-NEXT: move $4, $1 +; MMR3-NEXT: move $5, $16 ; MMR3-NEXT: lwp $16, 40($sp) ; MMR3-NEXT: addiusp 48 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: ashr_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -16 -; MMR6-NEXT: .cfi_def_cfa_offset 16 -; MMR6-NEXT: sw $17, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 8($sp) # 4-byte Folded Spill +; MMR6-NEXT: addiu $sp, $sp, -24 +; MMR6-NEXT: .cfi_def_cfa_offset 24 +; MMR6-NEXT: sw $17, 20($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $16, 16($sp) # 4-byte Folded Spill ; MMR6-NEXT: .cfi_offset 17, -4 ; MMR6-NEXT: .cfi_offset 16, -8 ; MMR6-NEXT: move $1, $7 -; MMR6-NEXT: lw $3, 44($sp) +; MMR6-NEXT: move $17, $5 +; MMR6-NEXT: move $5, $4 +; MMR6-NEXT: lw $7, 52($sp) ; MMR6-NEXT: li16 $2, 64 -; MMR6-NEXT: subu16 $7, $2, $3 -; MMR6-NEXT: sllv $8, $5, $7 -; MMR6-NEXT: andi16 $2, $7, 32 -; MMR6-NEXT: selnez $9, $8, $2 -; MMR6-NEXT: sllv $10, $4, $7 -; MMR6-NEXT: not16 $7, $7 -; MMR6-NEXT: srl16 $16, $5, 1 -; MMR6-NEXT: srlv $7, $16, $7 -; MMR6-NEXT: or $7, $10, $7 -; MMR6-NEXT: seleqz $7, $7, $2 -; MMR6-NEXT: or $7, $9, $7 -; MMR6-NEXT: srlv $9, $1, $3 +; MMR6-NEXT: subu16 $2, $2, $7 +; MMR6-NEXT: andi16 $3, $2, 63 +; MMR6-NEXT: sllv $8, $4, $3 ; MMR6-NEXT: not16 $16, $3 -; MMR6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $17, $6, 1 -; MMR6-NEXT: sllv $10, $17, $16 -; MMR6-NEXT: or $9, $10, $9 -; MMR6-NEXT: andi16 $17, $3, 32 -; MMR6-NEXT: seleqz $9, $9, $17 -; MMR6-NEXT: srlv $10, $6, $3 -; MMR6-NEXT: selnez $11, $10, $17 -; MMR6-NEXT: seleqz $10, $10, $17 -; MMR6-NEXT: or $10, $10, $7 -; MMR6-NEXT: seleqz $12, $8, $2 -; MMR6-NEXT: or $8, $11, $9 -; MMR6-NEXT: addiu $2, $3, -64 -; MMR6-NEXT: srlv $9, $5, $2 -; MMR6-NEXT: sll16 $7, $4, 1 -; MMR6-NEXT: not16 $16, $2 -; MMR6-NEXT: sllv $11, $7, $16 -; MMR6-NEXT: sltiu $13, $3, 64 -; MMR6-NEXT: or $8, $8, $12 -; MMR6-NEXT: selnez $10, $10, $13 -; MMR6-NEXT: or $9, $11, $9 -; MMR6-NEXT: srav $11, $4, $2 +; MMR6-NEXT: move $4, $17 +; MMR6-NEXT: sw $17, 8($sp) # 4-byte Folded Spill +; MMR6-NEXT: srl16 $17, $17, 1 +; MMR6-NEXT: srlv $9, $17, $16 +; MMR6-NEXT: or $8, $8, $9 +; MMR6-NEXT: srav $9, $5, $3 +; MMR6-NEXT: sllv $10, $4, $3 ; MMR6-NEXT: andi16 $2, $2, 32 -; MMR6-NEXT: seleqz $12, $11, $2 -; MMR6-NEXT: sra $14, $4, 31 -; MMR6-NEXT: selnez $15, $14, $2 +; MMR6-NEXT: selnez $11, $10, $2 +; MMR6-NEXT: seleqz $8, $8, $2 +; MMR6-NEXT: selnez $12, $9, $2 +; MMR6-NEXT: srlv $13, $4, $3 +; MMR6-NEXT: sll16 $17, $5, 1 +; MMR6-NEXT: sw $17, 12($sp) # 4-byte Folded Spill +; MMR6-NEXT: sllv $14, $17, $16 +; MMR6-NEXT: or $13, $14, $13 +; MMR6-NEXT: seleqz $13, $13, $2 +; MMR6-NEXT: or $12, $12, $13 +; MMR6-NEXT: srlv $13, $6, $3 +; MMR6-NEXT: addiu $4, $7, -64 +; MMR6-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sltiu $14, $7, 64 +; MMR6-NEXT: sra $15, $5, 31 +; MMR6-NEXT: or $8, $11, $8 +; MMR6-NEXT: seleqz $11, $13, $2 ; MMR6-NEXT: seleqz $9, $9, $2 -; MMR6-NEXT: or $12, $15, $12 -; MMR6-NEXT: seleqz $12, $12, $13 -; MMR6-NEXT: selnez $2, $11, $2 -; MMR6-NEXT: seleqz $11, $14, $13 -; MMR6-NEXT: or $10, $10, $12 -; MMR6-NEXT: selnez $10, $10, $3 -; MMR6-NEXT: selnez $8, $8, $13 -; MMR6-NEXT: or $2, $2, $9 -; MMR6-NEXT: srav $9, $4, $3 -; MMR6-NEXT: seleqz $4, $9, $17 -; MMR6-NEXT: selnez $12, $14, $17 -; MMR6-NEXT: or $4, $12, $4 -; MMR6-NEXT: selnez $12, $4, $13 -; MMR6-NEXT: seleqz $2, $2, $13 -; MMR6-NEXT: seleqz $4, $6, $3 -; MMR6-NEXT: seleqz $1, $1, $3 -; MMR6-NEXT: or $2, $8, $2 -; MMR6-NEXT: selnez $2, $2, $3 -; MMR6-NEXT: or $1, $1, $2 -; MMR6-NEXT: or $4, $4, $10 -; MMR6-NEXT: or $2, $12, $11 -; MMR6-NEXT: srlv $3, $5, $3 -; MMR6-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: sllv $5, $7, $5 -; MMR6-NEXT: or $3, $5, $3 -; MMR6-NEXT: seleqz $3, $3, $17 -; MMR6-NEXT: selnez $5, $9, $17 -; MMR6-NEXT: or $3, $5, $3 -; MMR6-NEXT: selnez $3, $3, $13 -; MMR6-NEXT: or $3, $3, $11 -; MMR6-NEXT: move $5, $1 +; MMR6-NEXT: selnez $24, $15, $2 +; MMR6-NEXT: seleqz $25, $15, $14 +; MMR6-NEXT: andi16 $17, $4, 32 +; MMR6-NEXT: seleqz $gp, $6, $7 +; MMR6-NEXT: selnez $12, $12, $14 +; MMR6-NEXT: selnez $13, $13, $2 +; MMR6-NEXT: srlv $3, $1, $3 +; MMR6-NEXT: sll16 $6, $6, 1 +; MMR6-NEXT: sllv $6, $6, $16 +; MMR6-NEXT: or $3, $6, $3 +; MMR6-NEXT: seleqz $3, $3, $2 +; MMR6-NEXT: or $3, $13, $3 +; MMR6-NEXT: seleqz $2, $10, $2 +; MMR6-NEXT: or $2, $3, $2 +; MMR6-NEXT: or $3, $12, $25 +; MMR6-NEXT: or $9, $24, $9 +; MMR6-NEXT: seleqz $4, $1, $7 +; MMR6-NEXT: or $1, $11, $8 +; MMR6-NEXT: selnez $1, $1, $14 +; MMR6-NEXT: selnez $8, $15, $17 +; MMR6-NEXT: lw $6, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: andi16 $6, $6, 63 +; MMR6-NEXT: srav $5, $5, $6 +; MMR6-NEXT: seleqz $10, $5, $17 +; MMR6-NEXT: or $8, $8, $10 +; MMR6-NEXT: seleqz $8, $8, $14 +; MMR6-NEXT: or $1, $1, $8 +; MMR6-NEXT: selnez $1, $1, $7 +; MMR6-NEXT: or $1, $gp, $1 +; MMR6-NEXT: selnez $2, $2, $14 ; MMR6-NEXT: lw $16, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 16 +; MMR6-NEXT: srlv $8, $16, $6 +; MMR6-NEXT: not16 $6, $6 +; MMR6-NEXT: lw $16, 12($sp) # 4-byte Folded Reload +; MMR6-NEXT: sllv $6, $16, $6 +; MMR6-NEXT: or $6, $6, $8 +; MMR6-NEXT: seleqz $6, $6, $17 +; MMR6-NEXT: selnez $5, $5, $17 +; MMR6-NEXT: or $5, $5, $6 +; MMR6-NEXT: seleqz $5, $5, $14 +; MMR6-NEXT: or $2, $2, $5 +; MMR6-NEXT: selnez $2, $2, $7 +; MMR6-NEXT: or $5, $4, $2 +; MMR6-NEXT: selnez $2, $9, $14 +; MMR6-NEXT: or $2, $2, $25 +; MMR6-NEXT: move $4, $1 +; MMR6-NEXT: lw $16, 16($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $17, 20($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 24 ; MMR6-NEXT: jrc $ra entry: %r = ashr i128 %a, %b diff --git a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll @@ -396,111 +396,111 @@ define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) { ; MIPS2-LABEL: lshr_i128: ; MIPS2: # %bb.0: # %entry -; MIPS2-NEXT: lw $2, 28($sp) +; MIPS2-NEXT: lw $8, 28($sp) ; MIPS2-NEXT: addiu $1, $zero, 64 -; MIPS2-NEXT: subu $12, $1, $2 -; MIPS2-NEXT: sllv $10, $5, $12 -; MIPS2-NEXT: andi $15, $12, 32 -; MIPS2-NEXT: andi $8, $2, 32 +; MIPS2-NEXT: subu $1, $1, $8 +; MIPS2-NEXT: sll $2, $4, 1 +; MIPS2-NEXT: addiu $3, $8, -64 +; MIPS2-NEXT: andi $12, $3, 63 +; MIPS2-NEXT: andi $9, $1, 63 +; MIPS2-NEXT: andi $10, $1, 32 +; MIPS2-NEXT: srlv $11, $4, $12 +; MIPS2-NEXT: andi $1, $3, 32 +; MIPS2-NEXT: bnez $1, $BB5_2 ; MIPS2-NEXT: addiu $3, $zero, 0 -; MIPS2-NEXT: bnez $15, $BB5_2 -; MIPS2-NEXT: addiu $13, $zero, 0 ; MIPS2-NEXT: # %bb.1: # %entry -; MIPS2-NEXT: move $13, $10 -; MIPS2-NEXT: $BB5_2: # %entry -; MIPS2-NEXT: not $9, $2 -; MIPS2-NEXT: bnez $8, $BB5_5 -; MIPS2-NEXT: srlv $24, $6, $2 -; MIPS2-NEXT: # %bb.3: # %entry -; MIPS2-NEXT: sll $1, $6, 1 -; MIPS2-NEXT: srlv $11, $7, $2 -; MIPS2-NEXT: sllv $1, $1, $9 -; MIPS2-NEXT: or $14, $1, $11 -; MIPS2-NEXT: bnez $15, $BB5_7 -; MIPS2-NEXT: move $11, $24 +; MIPS2-NEXT: not $1, $12 +; MIPS2-NEXT: srlv $12, $5, $12 +; MIPS2-NEXT: sllv $1, $2, $1 +; MIPS2-NEXT: or $12, $1, $12 +; MIPS2-NEXT: b $BB5_3 +; MIPS2-NEXT: move $13, $11 +; MIPS2-NEXT: $BB5_2: +; MIPS2-NEXT: addiu $13, $zero, 0 +; MIPS2-NEXT: move $12, $11 +; MIPS2-NEXT: $BB5_3: # %entry +; MIPS2-NEXT: not $11, $9 +; MIPS2-NEXT: bnez $10, $BB5_5 +; MIPS2-NEXT: srlv $15, $6, $9 ; MIPS2-NEXT: # %bb.4: # %entry +; MIPS2-NEXT: sll $1, $6, 1 +; MIPS2-NEXT: srlv $14, $7, $9 +; MIPS2-NEXT: sllv $1, $1, $11 +; MIPS2-NEXT: or $25, $1, $14 ; MIPS2-NEXT: b $BB5_6 -; MIPS2-NEXT: nop +; MIPS2-NEXT: move $14, $15 ; MIPS2-NEXT: $BB5_5: -; MIPS2-NEXT: addiu $11, $zero, 0 -; MIPS2-NEXT: bnez $15, $BB5_7 -; MIPS2-NEXT: move $14, $24 +; MIPS2-NEXT: addiu $14, $zero, 0 +; MIPS2-NEXT: move $25, $15 ; MIPS2-NEXT: $BB5_6: # %entry -; MIPS2-NEXT: sllv $1, $4, $12 -; MIPS2-NEXT: not $10, $12 -; MIPS2-NEXT: srl $12, $5, 1 -; MIPS2-NEXT: srlv $10, $12, $10 -; MIPS2-NEXT: or $10, $1, $10 -; MIPS2-NEXT: $BB5_7: # %entry -; MIPS2-NEXT: addiu $15, $2, -64 -; MIPS2-NEXT: sll $12, $4, 1 -; MIPS2-NEXT: andi $1, $15, 32 -; MIPS2-NEXT: bnez $1, $BB5_10 -; MIPS2-NEXT: srlv $25, $4, $15 -; MIPS2-NEXT: # %bb.8: # %entry -; MIPS2-NEXT: srlv $1, $5, $15 -; MIPS2-NEXT: not $15, $15 -; MIPS2-NEXT: sllv $15, $12, $15 -; MIPS2-NEXT: or $24, $15, $1 -; MIPS2-NEXT: move $15, $25 -; MIPS2-NEXT: sltiu $25, $2, 64 -; MIPS2-NEXT: beqz $25, $BB5_12 -; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.9: # %entry -; MIPS2-NEXT: b $BB5_11 +; MIPS2-NEXT: sllv $15, $5, $9 +; MIPS2-NEXT: beqz $10, $BB5_16 +; MIPS2-NEXT: addiu $gp, $zero, 0 +; MIPS2-NEXT: # %bb.7: # %entry +; MIPS2-NEXT: sltiu $24, $8, 64 +; MIPS2-NEXT: bnez $24, $BB5_17 ; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_10: -; MIPS2-NEXT: move $24, $25 -; MIPS2-NEXT: sltiu $25, $2, 64 -; MIPS2-NEXT: beqz $25, $BB5_12 -; MIPS2-NEXT: addiu $15, $zero, 0 -; MIPS2-NEXT: $BB5_11: -; MIPS2-NEXT: or $24, $14, $13 -; MIPS2-NEXT: $BB5_12: # %entry -; MIPS2-NEXT: sltiu $13, $2, 1 -; MIPS2-NEXT: beqz $13, $BB5_19 +; MIPS2-NEXT: $BB5_8: # %entry +; MIPS2-NEXT: beqz $10, $BB5_18 ; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.13: # %entry -; MIPS2-NEXT: bnez $25, $BB5_20 +; MIPS2-NEXT: $BB5_9: # %entry +; MIPS2-NEXT: bnez $24, $BB5_19 +; MIPS2-NEXT: sltiu $25, $8, 1 +; MIPS2-NEXT: $BB5_10: # %entry +; MIPS2-NEXT: beqz $25, $BB5_20 ; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_14: # %entry -; MIPS2-NEXT: bnez $13, $BB5_16 -; MIPS2-NEXT: addiu $10, $zero, 63 -; MIPS2-NEXT: $BB5_15: # %entry -; MIPS2-NEXT: move $6, $15 -; MIPS2-NEXT: $BB5_16: # %entry -; MIPS2-NEXT: sltu $10, $10, $2 -; MIPS2-NEXT: bnez $8, $BB5_22 -; MIPS2-NEXT: srlv $11, $4, $2 -; MIPS2-NEXT: # %bb.17: # %entry -; MIPS2-NEXT: srlv $1, $5, $2 -; MIPS2-NEXT: sllv $2, $12, $9 +; MIPS2-NEXT: $BB5_11: # %entry +; MIPS2-NEXT: bnez $25, $BB5_13 +; MIPS2-NEXT: addiu $13, $zero, 63 +; MIPS2-NEXT: $BB5_12: # %entry +; MIPS2-NEXT: move $7, $12 +; MIPS2-NEXT: $BB5_13: # %entry +; MIPS2-NEXT: sltu $8, $13, $8 +; MIPS2-NEXT: bnez $10, $BB5_22 +; MIPS2-NEXT: srlv $12, $4, $9 +; MIPS2-NEXT: # %bb.14: # %entry +; MIPS2-NEXT: srlv $1, $5, $9 +; MIPS2-NEXT: sllv $2, $2, $11 ; MIPS2-NEXT: or $4, $2, $1 -; MIPS2-NEXT: move $5, $11 -; MIPS2-NEXT: bnez $10, $BB5_24 +; MIPS2-NEXT: move $5, $12 +; MIPS2-NEXT: bnez $8, $BB5_24 ; MIPS2-NEXT: addiu $2, $zero, 0 -; MIPS2-NEXT: # %bb.18: # %entry +; MIPS2-NEXT: # %bb.15: # %entry ; MIPS2-NEXT: b $BB5_23 ; MIPS2-NEXT: nop -; MIPS2-NEXT: $BB5_19: # %entry -; MIPS2-NEXT: beqz $25, $BB5_14 -; MIPS2-NEXT: move $7, $24 -; MIPS2-NEXT: $BB5_20: -; MIPS2-NEXT: or $15, $11, $10 -; MIPS2-NEXT: bnez $13, $BB5_16 -; MIPS2-NEXT: addiu $10, $zero, 63 -; MIPS2-NEXT: # %bb.21: -; MIPS2-NEXT: b $BB5_15 +; MIPS2-NEXT: $BB5_16: # %entry +; MIPS2-NEXT: sltiu $24, $8, 64 +; MIPS2-NEXT: beqz $24, $BB5_8 +; MIPS2-NEXT: move $gp, $15 +; MIPS2-NEXT: $BB5_17: +; MIPS2-NEXT: bnez $10, $BB5_9 +; MIPS2-NEXT: or $12, $25, $gp +; MIPS2-NEXT: $BB5_18: # %entry +; MIPS2-NEXT: sllv $1, $4, $9 +; MIPS2-NEXT: srl $15, $5, 1 +; MIPS2-NEXT: srlv $15, $15, $11 +; MIPS2-NEXT: or $15, $1, $15 +; MIPS2-NEXT: beqz $24, $BB5_10 +; MIPS2-NEXT: sltiu $25, $8, 1 +; MIPS2-NEXT: $BB5_19: +; MIPS2-NEXT: bnez $25, $BB5_11 +; MIPS2-NEXT: or $13, $14, $15 +; MIPS2-NEXT: $BB5_20: # %entry +; MIPS2-NEXT: move $6, $13 +; MIPS2-NEXT: bnez $25, $BB5_13 +; MIPS2-NEXT: addiu $13, $zero, 63 +; MIPS2-NEXT: # %bb.21: # %entry +; MIPS2-NEXT: b $BB5_12 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_22: ; MIPS2-NEXT: addiu $5, $zero, 0 -; MIPS2-NEXT: move $4, $11 -; MIPS2-NEXT: bnez $10, $BB5_24 +; MIPS2-NEXT: move $4, $12 +; MIPS2-NEXT: bnez $8, $BB5_24 ; MIPS2-NEXT: addiu $2, $zero, 0 ; MIPS2-NEXT: $BB5_23: # %entry ; MIPS2-NEXT: move $2, $5 ; MIPS2-NEXT: $BB5_24: # %entry -; MIPS2-NEXT: bnez $10, $BB5_26 +; MIPS2-NEXT: bnez $8, $BB5_26 ; MIPS2-NEXT: nop ; MIPS2-NEXT: # %bb.25: # %entry ; MIPS2-NEXT: move $3, $4 @@ -511,184 +511,180 @@ ; ; MIPS32-LABEL: lshr_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $9, 28($sp) +; MIPS32-NEXT: lw $2, 28($sp) ; MIPS32-NEXT: addiu $1, $zero, 64 -; MIPS32-NEXT: subu $2, $1, $9 -; MIPS32-NEXT: sllv $10, $5, $2 -; MIPS32-NEXT: andi $11, $2, 32 -; MIPS32-NEXT: move $1, $10 -; MIPS32-NEXT: movn $1, $zero, $11 -; MIPS32-NEXT: srlv $3, $7, $9 -; MIPS32-NEXT: not $12, $9 -; MIPS32-NEXT: sll $8, $6, 1 -; MIPS32-NEXT: sllv $8, $8, $12 -; MIPS32-NEXT: or $3, $8, $3 -; MIPS32-NEXT: srlv $13, $6, $9 -; MIPS32-NEXT: andi $14, $9, 32 -; MIPS32-NEXT: movn $3, $13, $14 -; MIPS32-NEXT: addiu $15, $9, -64 -; MIPS32-NEXT: or $3, $3, $1 -; MIPS32-NEXT: srlv $1, $5, $15 +; MIPS32-NEXT: subu $1, $1, $2 +; MIPS32-NEXT: andi $3, $1, 63 +; MIPS32-NEXT: sllv $9, $5, $3 +; MIPS32-NEXT: andi $10, $1, 32 +; MIPS32-NEXT: move $1, $9 +; MIPS32-NEXT: movn $1, $zero, $10 +; MIPS32-NEXT: srlv $8, $7, $3 +; MIPS32-NEXT: not $11, $3 +; MIPS32-NEXT: sll $12, $6, 1 +; MIPS32-NEXT: sllv $12, $12, $11 +; MIPS32-NEXT: or $8, $12, $8 +; MIPS32-NEXT: srlv $12, $6, $3 +; MIPS32-NEXT: movn $8, $12, $10 +; MIPS32-NEXT: addiu $13, $2, -64 +; MIPS32-NEXT: andi $14, $13, 63 +; MIPS32-NEXT: or $15, $8, $1 +; MIPS32-NEXT: srlv $1, $5, $14 ; MIPS32-NEXT: sll $24, $4, 1 -; MIPS32-NEXT: not $8, $15 +; MIPS32-NEXT: not $8, $14 ; MIPS32-NEXT: sllv $8, $24, $8 ; MIPS32-NEXT: or $1, $8, $1 -; MIPS32-NEXT: srlv $8, $4, $15 -; MIPS32-NEXT: andi $15, $15, 32 -; MIPS32-NEXT: movn $1, $8, $15 -; MIPS32-NEXT: sltiu $25, $9, 64 -; MIPS32-NEXT: movn $1, $3, $25 -; MIPS32-NEXT: sllv $3, $4, $2 -; MIPS32-NEXT: not $2, $2 -; MIPS32-NEXT: srl $gp, $5, 1 -; MIPS32-NEXT: srlv $2, $gp, $2 -; MIPS32-NEXT: or $gp, $3, $2 -; MIPS32-NEXT: srlv $2, $5, $9 -; MIPS32-NEXT: sllv $3, $24, $12 -; MIPS32-NEXT: or $3, $3, $2 -; MIPS32-NEXT: srlv $2, $4, $9 -; MIPS32-NEXT: movn $3, $2, $14 -; MIPS32-NEXT: movz $1, $7, $9 -; MIPS32-NEXT: movz $3, $zero, $25 -; MIPS32-NEXT: movn $gp, $10, $11 -; MIPS32-NEXT: movn $13, $zero, $14 -; MIPS32-NEXT: or $4, $13, $gp -; MIPS32-NEXT: movn $8, $zero, $15 -; MIPS32-NEXT: movn $8, $4, $25 -; MIPS32-NEXT: movz $8, $6, $9 -; MIPS32-NEXT: movn $2, $zero, $14 -; MIPS32-NEXT: movz $2, $zero, $25 +; MIPS32-NEXT: srlv $8, $4, $14 +; MIPS32-NEXT: andi $13, $13, 32 +; MIPS32-NEXT: movn $1, $8, $13 +; MIPS32-NEXT: sltiu $14, $2, 64 +; MIPS32-NEXT: movn $1, $15, $14 +; MIPS32-NEXT: movn $12, $zero, $10 +; MIPS32-NEXT: sllv $15, $4, $3 +; MIPS32-NEXT: srl $25, $5, 1 +; MIPS32-NEXT: srlv $25, $25, $11 +; MIPS32-NEXT: or $15, $15, $25 +; MIPS32-NEXT: movn $15, $9, $10 +; MIPS32-NEXT: or $9, $12, $15 +; MIPS32-NEXT: movn $8, $zero, $13 +; MIPS32-NEXT: movn $8, $9, $14 +; MIPS32-NEXT: srlv $4, $4, $3 +; MIPS32-NEXT: movz $8, $6, $2 +; MIPS32-NEXT: movz $1, $7, $2 +; MIPS32-NEXT: move $2, $4 +; MIPS32-NEXT: movn $2, $zero, $10 +; MIPS32-NEXT: movz $2, $zero, $14 +; MIPS32-NEXT: srlv $3, $5, $3 +; MIPS32-NEXT: sllv $5, $24, $11 +; MIPS32-NEXT: or $3, $5, $3 +; MIPS32-NEXT: movn $3, $4, $10 +; MIPS32-NEXT: movz $3, $zero, $14 ; MIPS32-NEXT: move $4, $8 ; MIPS32-NEXT: jr $ra ; MIPS32-NEXT: move $5, $1 ; ; MIPS32R2-LABEL: lshr_i128: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: lw $9, 28($sp) +; MIPS32R2-NEXT: lw $2, 28($sp) ; MIPS32R2-NEXT: addiu $1, $zero, 64 -; MIPS32R2-NEXT: subu $2, $1, $9 -; MIPS32R2-NEXT: sllv $10, $5, $2 -; MIPS32R2-NEXT: andi $11, $2, 32 -; MIPS32R2-NEXT: move $1, $10 -; MIPS32R2-NEXT: movn $1, $zero, $11 -; MIPS32R2-NEXT: srlv $3, $7, $9 -; MIPS32R2-NEXT: not $12, $9 -; MIPS32R2-NEXT: sll $8, $6, 1 -; MIPS32R2-NEXT: sllv $8, $8, $12 -; MIPS32R2-NEXT: or $3, $8, $3 -; MIPS32R2-NEXT: srlv $13, $6, $9 -; MIPS32R2-NEXT: andi $14, $9, 32 -; MIPS32R2-NEXT: movn $3, $13, $14 -; MIPS32R2-NEXT: addiu $15, $9, -64 -; MIPS32R2-NEXT: or $3, $3, $1 -; MIPS32R2-NEXT: srlv $1, $5, $15 +; MIPS32R2-NEXT: subu $1, $1, $2 +; MIPS32R2-NEXT: andi $3, $1, 63 +; MIPS32R2-NEXT: sllv $9, $5, $3 +; MIPS32R2-NEXT: andi $10, $1, 32 +; MIPS32R2-NEXT: move $1, $9 +; MIPS32R2-NEXT: movn $1, $zero, $10 +; MIPS32R2-NEXT: srlv $8, $7, $3 +; MIPS32R2-NEXT: not $11, $3 +; MIPS32R2-NEXT: sll $12, $6, 1 +; MIPS32R2-NEXT: sllv $12, $12, $11 +; MIPS32R2-NEXT: or $8, $12, $8 +; MIPS32R2-NEXT: srlv $12, $6, $3 +; MIPS32R2-NEXT: movn $8, $12, $10 +; MIPS32R2-NEXT: addiu $13, $2, -64 +; MIPS32R2-NEXT: andi $14, $13, 63 +; MIPS32R2-NEXT: or $15, $8, $1 +; MIPS32R2-NEXT: srlv $1, $5, $14 ; MIPS32R2-NEXT: sll $24, $4, 1 -; MIPS32R2-NEXT: not $8, $15 +; MIPS32R2-NEXT: not $8, $14 ; MIPS32R2-NEXT: sllv $8, $24, $8 ; MIPS32R2-NEXT: or $1, $8, $1 -; MIPS32R2-NEXT: srlv $8, $4, $15 -; MIPS32R2-NEXT: andi $15, $15, 32 -; MIPS32R2-NEXT: movn $1, $8, $15 -; MIPS32R2-NEXT: sltiu $25, $9, 64 -; MIPS32R2-NEXT: movn $1, $3, $25 -; MIPS32R2-NEXT: sllv $3, $4, $2 -; MIPS32R2-NEXT: not $2, $2 -; MIPS32R2-NEXT: srl $gp, $5, 1 -; MIPS32R2-NEXT: srlv $2, $gp, $2 -; MIPS32R2-NEXT: or $gp, $3, $2 -; MIPS32R2-NEXT: srlv $2, $5, $9 -; MIPS32R2-NEXT: sllv $3, $24, $12 -; MIPS32R2-NEXT: or $3, $3, $2 -; MIPS32R2-NEXT: srlv $2, $4, $9 -; MIPS32R2-NEXT: movn $3, $2, $14 -; MIPS32R2-NEXT: movz $1, $7, $9 -; MIPS32R2-NEXT: movz $3, $zero, $25 -; MIPS32R2-NEXT: movn $gp, $10, $11 -; MIPS32R2-NEXT: movn $13, $zero, $14 -; MIPS32R2-NEXT: or $4, $13, $gp -; MIPS32R2-NEXT: movn $8, $zero, $15 -; MIPS32R2-NEXT: movn $8, $4, $25 -; MIPS32R2-NEXT: movz $8, $6, $9 -; MIPS32R2-NEXT: movn $2, $zero, $14 -; MIPS32R2-NEXT: movz $2, $zero, $25 +; MIPS32R2-NEXT: srlv $8, $4, $14 +; MIPS32R2-NEXT: andi $13, $13, 32 +; MIPS32R2-NEXT: movn $1, $8, $13 +; MIPS32R2-NEXT: sltiu $14, $2, 64 +; MIPS32R2-NEXT: movn $1, $15, $14 +; MIPS32R2-NEXT: movn $12, $zero, $10 +; MIPS32R2-NEXT: sllv $15, $4, $3 +; MIPS32R2-NEXT: srl $25, $5, 1 +; MIPS32R2-NEXT: srlv $25, $25, $11 +; MIPS32R2-NEXT: or $15, $15, $25 +; MIPS32R2-NEXT: movn $15, $9, $10 +; MIPS32R2-NEXT: or $9, $12, $15 +; MIPS32R2-NEXT: movn $8, $zero, $13 +; MIPS32R2-NEXT: movn $8, $9, $14 +; MIPS32R2-NEXT: srlv $4, $4, $3 +; MIPS32R2-NEXT: movz $8, $6, $2 +; MIPS32R2-NEXT: movz $1, $7, $2 +; MIPS32R2-NEXT: move $2, $4 +; MIPS32R2-NEXT: movn $2, $zero, $10 +; MIPS32R2-NEXT: movz $2, $zero, $14 +; MIPS32R2-NEXT: srlv $3, $5, $3 +; MIPS32R2-NEXT: sllv $5, $24, $11 +; MIPS32R2-NEXT: or $3, $5, $3 +; MIPS32R2-NEXT: movn $3, $4, $10 +; MIPS32R2-NEXT: movz $3, $zero, $14 ; MIPS32R2-NEXT: move $4, $8 ; MIPS32R2-NEXT: jr $ra ; MIPS32R2-NEXT: move $5, $1 ; ; MIPS32R6-LABEL: lshr_i128: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: addiu $sp, $sp, -8 -; MIPS32R6-NEXT: .cfi_def_cfa_offset 8 -; MIPS32R6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill -; MIPS32R6-NEXT: .cfi_offset 16, -4 -; MIPS32R6-NEXT: lw $1, 36($sp) -; MIPS32R6-NEXT: srlv $2, $7, $1 -; MIPS32R6-NEXT: not $3, $1 -; MIPS32R6-NEXT: sll $8, $6, 1 -; MIPS32R6-NEXT: sllv $8, $8, $3 -; MIPS32R6-NEXT: or $2, $8, $2 -; MIPS32R6-NEXT: addiu $8, $1, -64 -; MIPS32R6-NEXT: srlv $9, $5, $8 -; MIPS32R6-NEXT: sll $10, $4, 1 -; MIPS32R6-NEXT: not $11, $8 -; MIPS32R6-NEXT: sllv $11, $10, $11 -; MIPS32R6-NEXT: andi $12, $1, 32 -; MIPS32R6-NEXT: seleqz $2, $2, $12 +; MIPS32R6-NEXT: lw $1, 28($sp) +; MIPS32R6-NEXT: addiu $2, $zero, 64 +; MIPS32R6-NEXT: subu $2, $2, $1 +; MIPS32R6-NEXT: andi $3, $2, 63 +; MIPS32R6-NEXT: srlv $8, $4, $3 +; MIPS32R6-NEXT: andi $2, $2, 32 +; MIPS32R6-NEXT: srlv $9, $7, $3 +; MIPS32R6-NEXT: not $10, $3 +; MIPS32R6-NEXT: sll $11, $6, 1 +; MIPS32R6-NEXT: sllv $11, $11, $10 ; MIPS32R6-NEXT: or $9, $11, $9 -; MIPS32R6-NEXT: srlv $11, $6, $1 -; MIPS32R6-NEXT: selnez $13, $11, $12 -; MIPS32R6-NEXT: addiu $14, $zero, 64 -; MIPS32R6-NEXT: subu $14, $14, $1 -; MIPS32R6-NEXT: sllv $15, $5, $14 -; MIPS32R6-NEXT: andi $24, $14, 32 -; MIPS32R6-NEXT: andi $25, $8, 32 -; MIPS32R6-NEXT: seleqz $9, $9, $25 -; MIPS32R6-NEXT: seleqz $gp, $15, $24 -; MIPS32R6-NEXT: or $2, $13, $2 -; MIPS32R6-NEXT: selnez $13, $15, $24 -; MIPS32R6-NEXT: sllv $15, $4, $14 -; MIPS32R6-NEXT: not $14, $14 -; MIPS32R6-NEXT: srl $16, $5, 1 -; MIPS32R6-NEXT: srlv $14, $16, $14 -; MIPS32R6-NEXT: or $14, $15, $14 -; MIPS32R6-NEXT: seleqz $14, $14, $24 -; MIPS32R6-NEXT: srlv $8, $4, $8 -; MIPS32R6-NEXT: or $13, $13, $14 -; MIPS32R6-NEXT: or $2, $2, $gp -; MIPS32R6-NEXT: srlv $5, $5, $1 -; MIPS32R6-NEXT: selnez $14, $8, $25 -; MIPS32R6-NEXT: sltiu $15, $1, 64 -; MIPS32R6-NEXT: selnez $2, $2, $15 -; MIPS32R6-NEXT: or $9, $14, $9 -; MIPS32R6-NEXT: sllv $3, $10, $3 -; MIPS32R6-NEXT: seleqz $10, $11, $12 -; MIPS32R6-NEXT: or $10, $10, $13 -; MIPS32R6-NEXT: or $3, $3, $5 -; MIPS32R6-NEXT: seleqz $5, $9, $15 -; MIPS32R6-NEXT: seleqz $9, $zero, $15 -; MIPS32R6-NEXT: srlv $4, $4, $1 -; MIPS32R6-NEXT: seleqz $11, $4, $12 -; MIPS32R6-NEXT: selnez $11, $11, $15 +; MIPS32R6-NEXT: selnez $11, $8, $2 +; MIPS32R6-NEXT: srlv $12, $5, $3 +; MIPS32R6-NEXT: sll $13, $4, 1 +; MIPS32R6-NEXT: sllv $14, $13, $10 +; MIPS32R6-NEXT: or $12, $14, $12 +; MIPS32R6-NEXT: seleqz $12, $12, $2 +; MIPS32R6-NEXT: sllv $14, $5, $3 +; MIPS32R6-NEXT: srlv $15, $6, $3 +; MIPS32R6-NEXT: or $11, $11, $12 +; MIPS32R6-NEXT: selnez $12, $15, $2 +; MIPS32R6-NEXT: seleqz $9, $9, $2 +; MIPS32R6-NEXT: selnez $24, $14, $2 +; MIPS32R6-NEXT: sllv $3, $4, $3 +; MIPS32R6-NEXT: srl $25, $5, 1 +; MIPS32R6-NEXT: srlv $10, $25, $10 +; MIPS32R6-NEXT: or $3, $3, $10 +; MIPS32R6-NEXT: seleqz $3, $3, $2 +; MIPS32R6-NEXT: sltiu $10, $1, 64 +; MIPS32R6-NEXT: or $24, $24, $3 +; MIPS32R6-NEXT: selnez $3, $11, $10 +; MIPS32R6-NEXT: or $9, $12, $9 +; MIPS32R6-NEXT: seleqz $11, $15, $2 +; MIPS32R6-NEXT: seleqz $8, $8, $2 +; MIPS32R6-NEXT: seleqz $2, $14, $2 +; MIPS32R6-NEXT: seleqz $12, $zero, $10 +; MIPS32R6-NEXT: addiu $14, $1, -64 +; MIPS32R6-NEXT: seleqz $6, $6, $1 +; MIPS32R6-NEXT: andi $15, $14, 63 +; MIPS32R6-NEXT: or $2, $9, $2 +; MIPS32R6-NEXT: or $3, $12, $3 +; MIPS32R6-NEXT: selnez $8, $8, $10 ; MIPS32R6-NEXT: seleqz $7, $7, $1 +; MIPS32R6-NEXT: or $9, $11, $24 +; MIPS32R6-NEXT: selnez $9, $9, $10 +; MIPS32R6-NEXT: srlv $11, $4, $15 +; MIPS32R6-NEXT: andi $14, $14, 32 +; MIPS32R6-NEXT: seleqz $4, $11, $14 +; MIPS32R6-NEXT: seleqz $4, $4, $10 +; MIPS32R6-NEXT: or $4, $9, $4 +; MIPS32R6-NEXT: selnez $4, $4, $1 +; MIPS32R6-NEXT: or $4, $6, $4 +; MIPS32R6-NEXT: selnez $2, $2, $10 +; MIPS32R6-NEXT: srlv $5, $5, $15 +; MIPS32R6-NEXT: not $6, $15 +; MIPS32R6-NEXT: sllv $6, $13, $6 +; MIPS32R6-NEXT: or $5, $6, $5 +; MIPS32R6-NEXT: seleqz $5, $5, $14 +; MIPS32R6-NEXT: selnez $6, $11, $14 +; MIPS32R6-NEXT: or $5, $6, $5 +; MIPS32R6-NEXT: seleqz $5, $5, $10 ; MIPS32R6-NEXT: or $2, $2, $5 -; MIPS32R6-NEXT: selnez $2, $2, $1 -; MIPS32R6-NEXT: or $5, $7, $2 -; MIPS32R6-NEXT: or $2, $9, $11 -; MIPS32R6-NEXT: seleqz $3, $3, $12 -; MIPS32R6-NEXT: selnez $7, $4, $12 -; MIPS32R6-NEXT: seleqz $4, $6, $1 -; MIPS32R6-NEXT: selnez $6, $10, $15 -; MIPS32R6-NEXT: seleqz $8, $8, $25 -; MIPS32R6-NEXT: seleqz $8, $8, $15 -; MIPS32R6-NEXT: or $6, $6, $8 -; MIPS32R6-NEXT: selnez $1, $6, $1 -; MIPS32R6-NEXT: or $4, $4, $1 -; MIPS32R6-NEXT: or $1, $7, $3 -; MIPS32R6-NEXT: selnez $1, $1, $15 -; MIPS32R6-NEXT: or $3, $9, $1 -; MIPS32R6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MIPS32R6-NEXT: selnez $1, $2, $1 +; MIPS32R6-NEXT: or $5, $7, $1 ; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: addiu $sp, $sp, 8 +; MIPS32R6-NEXT: or $2, $12, $8 ; ; MIPS3-LABEL: lshr_i128: ; MIPS3: # %bb.0: # %entry @@ -770,183 +766,172 @@ ; ; MMR3-LABEL: lshr_i128: ; MMR3: # %bb.0: # %entry -; MMR3-NEXT: addiusp -40 -; MMR3-NEXT: .cfi_def_cfa_offset 40 -; MMR3-NEXT: swp $16, 32($sp) +; MMR3-NEXT: addiusp -48 +; MMR3-NEXT: .cfi_def_cfa_offset 48 +; MMR3-NEXT: swp $16, 40($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 ; MMR3-NEXT: move $8, $7 -; MMR3-NEXT: sw $6, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $4, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $16, 68($sp) +; MMR3-NEXT: move $17, $6 +; MMR3-NEXT: sw $6, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $4, 32($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $3, 76($sp) +; MMR3-NEXT: sw $3, 24($sp) # 4-byte Folded Spill ; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: subu16 $7, $2, $16 -; MMR3-NEXT: sllv $9, $5, $7 -; MMR3-NEXT: move $17, $5 -; MMR3-NEXT: sw $5, 0($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $3, $7, 32 -; MMR3-NEXT: sw $3, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: subu16 $2, $2, $3 +; MMR3-NEXT: andi16 $6, $2, 63 +; MMR3-NEXT: sllv $9, $5, $6 +; MMR3-NEXT: sw $5, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: andi16 $3, $2, 32 ; MMR3-NEXT: li16 $2, 0 -; MMR3-NEXT: move $4, $9 -; MMR3-NEXT: movn $4, $2, $3 -; MMR3-NEXT: srlv $5, $8, $16 -; MMR3-NEXT: not16 $3, $16 -; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: sll16 $2, $6, 1 -; MMR3-NEXT: sllv $2, $2, $3 -; MMR3-NEXT: or16 $2, $5 -; MMR3-NEXT: srlv $5, $6, $16 -; MMR3-NEXT: sw $5, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $3, $16, 32 +; MMR3-NEXT: move $16, $9 +; MMR3-NEXT: movn $16, $2, $3 ; MMR3-NEXT: sw $3, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $2, $5, $3 -; MMR3-NEXT: addiu $3, $16, -64 -; MMR3-NEXT: or16 $2, $4 -; MMR3-NEXT: srlv $4, $17, $3 -; MMR3-NEXT: sw $4, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: lw $4, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: sll16 $6, $4, 1 -; MMR3-NEXT: not16 $5, $3 -; MMR3-NEXT: sllv $5, $6, $5 -; MMR3-NEXT: lw $17, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $5, $17 -; MMR3-NEXT: srlv $1, $4, $3 -; MMR3-NEXT: andi16 $3, $3, 32 +; MMR3-NEXT: srlv $7, $7, $6 +; MMR3-NEXT: not16 $4, $6 +; MMR3-NEXT: sw $4, 36($sp) # 4-byte Folded Spill +; MMR3-NEXT: sll16 $2, $17, 1 +; MMR3-NEXT: sllv $2, $2, $4 +; MMR3-NEXT: or16 $2, $7 +; MMR3-NEXT: srlv $7, $17, $6 +; MMR3-NEXT: movn $2, $7, $3 +; MMR3-NEXT: lw $17, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: addiu $3, $17, -64 ; MMR3-NEXT: sw $3, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $5, $1, $3 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $5, $2, $10 -; MMR3-NEXT: sllv $2, $4, $7 -; MMR3-NEXT: not16 $3, $7 -; MMR3-NEXT: lw $7, 0($sp) # 4-byte Folded Reload -; MMR3-NEXT: srl16 $4, $7, 1 -; MMR3-NEXT: srlv $4, $4, $3 -; MMR3-NEXT: or16 $4, $2 -; MMR3-NEXT: srlv $2, $7, $16 -; MMR3-NEXT: lw $3, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $3, $6, $3 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: lw $2, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $2, $2, $16 -; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $2, $17 -; MMR3-NEXT: movz $5, $8, $16 -; MMR3-NEXT: li16 $6, 0 -; MMR3-NEXT: movz $3, $6, $10 -; MMR3-NEXT: lw $7, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $4, $9, $7 -; MMR3-NEXT: lw $6, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $6, $7, $17 -; MMR3-NEXT: or16 $6, $4 +; MMR3-NEXT: andi16 $4, $3, 63 +; MMR3-NEXT: or16 $2, $16 +; MMR3-NEXT: srlv $3, $5, $4 +; MMR3-NEXT: sw $3, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: lw $3, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: sll16 $5, $3, 1 +; MMR3-NEXT: sw $5, 16($sp) # 4-byte Folded Spill +; MMR3-NEXT: not16 $16, $4 +; MMR3-NEXT: sllv $16, $5, $16 +; MMR3-NEXT: lw $5, 4($sp) # 4-byte Folded Reload +; MMR3-NEXT: or16 $16, $5 +; MMR3-NEXT: srlv $1, $3, $4 ; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $1, $7, $4 -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $1, $6, $10 -; MMR3-NEXT: lw $4, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $1, $4, $16 -; MMR3-NEXT: movn $2, $7, $17 -; MMR3-NEXT: li16 $4, 0 -; MMR3-NEXT: movz $2, $4, $10 +; MMR3-NEXT: andi16 $5, $4, 32 +; MMR3-NEXT: movn $16, $1, $5 +; MMR3-NEXT: sltiu $10, $17, 64 +; MMR3-NEXT: movn $16, $2, $10 +; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: li16 $2, 0 +; MMR3-NEXT: movn $7, $2, $17 +; MMR3-NEXT: sllv $2, $3, $6 +; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: srl16 $3, $3, 1 +; MMR3-NEXT: lw $4, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $3, $3, $4 +; MMR3-NEXT: or16 $3, $2 +; MMR3-NEXT: movn $3, $9, $17 +; MMR3-NEXT: or16 $3, $7 +; MMR3-NEXT: li16 $2, 0 +; MMR3-NEXT: movn $1, $2, $5 +; MMR3-NEXT: movn $1, $3, $10 +; MMR3-NEXT: lw $2, 32($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $9, $2, $6 +; MMR3-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: lw $3, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $1, $3, $2 +; MMR3-NEXT: movz $16, $8, $2 +; MMR3-NEXT: move $2, $9 +; MMR3-NEXT: li16 $3, 0 +; MMR3-NEXT: movn $2, $3, $17 +; MMR3-NEXT: movz $2, $3, $10 +; MMR3-NEXT: li16 $5, 0 +; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $4, $3, $6 +; MMR3-NEXT: lw $3, 36($sp) # 4-byte Folded Reload +; MMR3-NEXT: lw $6, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $3, $6, $3 +; MMR3-NEXT: or16 $3, $4 +; MMR3-NEXT: movn $3, $9, $17 +; MMR3-NEXT: movz $3, $5, $10 ; MMR3-NEXT: move $4, $1 -; MMR3-NEXT: lwp $16, 32($sp) -; MMR3-NEXT: addiusp 40 +; MMR3-NEXT: move $5, $16 +; MMR3-NEXT: lwp $16, 40($sp) +; MMR3-NEXT: addiusp 48 ; MMR3-NEXT: jrc $ra ; ; MMR6-LABEL: lshr_i128: ; MMR6: # %bb.0: # %entry -; MMR6-NEXT: addiu $sp, $sp, -32 -; MMR6-NEXT: .cfi_def_cfa_offset 32 -; MMR6-NEXT: sw $17, 28($sp) # 4-byte Folded Spill -; MMR6-NEXT: sw $16, 24($sp) # 4-byte Folded Spill +; MMR6-NEXT: addiu $sp, $sp, -8 +; MMR6-NEXT: .cfi_def_cfa_offset 8 +; MMR6-NEXT: sw $17, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: sw $16, 0($sp) # 4-byte Folded Spill ; MMR6-NEXT: .cfi_offset 17, -4 ; MMR6-NEXT: .cfi_offset 16, -8 ; MMR6-NEXT: move $1, $7 -; MMR6-NEXT: move $7, $5 -; MMR6-NEXT: lw $3, 60($sp) -; MMR6-NEXT: srlv $2, $1, $3 -; MMR6-NEXT: not16 $5, $3 -; MMR6-NEXT: sw $5, 12($sp) # 4-byte Folded Spill -; MMR6-NEXT: move $17, $6 -; MMR6-NEXT: sw $6, 16($sp) # 4-byte Folded Spill -; MMR6-NEXT: sll16 $6, $6, 1 -; MMR6-NEXT: sllv $6, $6, $5 -; MMR6-NEXT: or $8, $6, $2 -; MMR6-NEXT: addiu $5, $3, -64 -; MMR6-NEXT: srlv $9, $7, $5 -; MMR6-NEXT: move $6, $4 -; MMR6-NEXT: sll16 $2, $4, 1 -; MMR6-NEXT: sw $2, 8($sp) # 4-byte Folded Spill -; MMR6-NEXT: not16 $16, $5 -; MMR6-NEXT: sllv $10, $2, $16 -; MMR6-NEXT: andi16 $16, $3, 32 -; MMR6-NEXT: seleqz $8, $8, $16 +; MMR6-NEXT: lw $7, 36($sp) +; MMR6-NEXT: li16 $2, 64 +; MMR6-NEXT: subu16 $2, $2, $7 +; MMR6-NEXT: andi16 $17, $2, 63 +; MMR6-NEXT: srlv $8, $4, $17 +; MMR6-NEXT: andi16 $3, $2, 32 +; MMR6-NEXT: srlv $9, $1, $17 +; MMR6-NEXT: not16 $2, $17 +; MMR6-NEXT: sll16 $16, $6, 1 +; MMR6-NEXT: sllv $10, $16, $2 ; MMR6-NEXT: or $9, $10, $9 -; MMR6-NEXT: srlv $10, $17, $3 -; MMR6-NEXT: selnez $11, $10, $16 -; MMR6-NEXT: li16 $17, 64 -; MMR6-NEXT: subu16 $2, $17, $3 -; MMR6-NEXT: sllv $12, $7, $2 -; MMR6-NEXT: move $17, $7 -; MMR6-NEXT: andi16 $4, $2, 32 -; MMR6-NEXT: andi16 $7, $5, 32 -; MMR6-NEXT: sw $7, 20($sp) # 4-byte Folded Spill -; MMR6-NEXT: seleqz $9, $9, $7 -; MMR6-NEXT: seleqz $13, $12, $4 -; MMR6-NEXT: or $8, $11, $8 -; MMR6-NEXT: selnez $11, $12, $4 -; MMR6-NEXT: sllv $12, $6, $2 -; MMR6-NEXT: move $7, $6 -; MMR6-NEXT: sw $6, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: not16 $2, $2 -; MMR6-NEXT: srl16 $6, $17, 1 -; MMR6-NEXT: srlv $2, $6, $2 -; MMR6-NEXT: or $2, $12, $2 -; MMR6-NEXT: seleqz $2, $2, $4 -; MMR6-NEXT: srlv $4, $7, $5 -; MMR6-NEXT: or $11, $11, $2 -; MMR6-NEXT: or $5, $8, $13 -; MMR6-NEXT: srlv $6, $17, $3 -; MMR6-NEXT: lw $2, 20($sp) # 4-byte Folded Reload -; MMR6-NEXT: selnez $7, $4, $2 -; MMR6-NEXT: sltiu $8, $3, 64 -; MMR6-NEXT: selnez $12, $5, $8 -; MMR6-NEXT: or $7, $7, $9 -; MMR6-NEXT: lw $5, 12($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $2, 8($sp) # 4-byte Folded Reload -; MMR6-NEXT: sllv $9, $2, $5 -; MMR6-NEXT: seleqz $10, $10, $16 -; MMR6-NEXT: li16 $5, 0 +; MMR6-NEXT: selnez $10, $8, $3 +; MMR6-NEXT: srlv $11, $5, $17 +; MMR6-NEXT: sll16 $16, $4, 1 +; MMR6-NEXT: sllv $12, $16, $2 +; MMR6-NEXT: or $11, $12, $11 +; MMR6-NEXT: seleqz $11, $11, $3 +; MMR6-NEXT: sllv $12, $5, $17 +; MMR6-NEXT: srlv $13, $6, $17 ; MMR6-NEXT: or $10, $10, $11 -; MMR6-NEXT: or $6, $9, $6 -; MMR6-NEXT: seleqz $2, $7, $8 -; MMR6-NEXT: seleqz $7, $5, $8 -; MMR6-NEXT: lw $5, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: srlv $9, $5, $3 -; MMR6-NEXT: seleqz $11, $9, $16 -; MMR6-NEXT: selnez $11, $11, $8 -; MMR6-NEXT: seleqz $1, $1, $3 -; MMR6-NEXT: or $2, $12, $2 -; MMR6-NEXT: selnez $2, $2, $3 -; MMR6-NEXT: or $5, $1, $2 -; MMR6-NEXT: or $2, $7, $11 -; MMR6-NEXT: seleqz $1, $6, $16 -; MMR6-NEXT: selnez $6, $9, $16 -; MMR6-NEXT: lw $16, 16($sp) # 4-byte Folded Reload -; MMR6-NEXT: seleqz $9, $16, $3 -; MMR6-NEXT: selnez $10, $10, $8 -; MMR6-NEXT: lw $16, 20($sp) # 4-byte Folded Reload -; MMR6-NEXT: seleqz $4, $4, $16 -; MMR6-NEXT: seleqz $4, $4, $8 +; MMR6-NEXT: selnez $11, $13, $3 +; MMR6-NEXT: seleqz $9, $9, $3 +; MMR6-NEXT: selnez $14, $12, $3 +; MMR6-NEXT: sllv $15, $4, $17 +; MMR6-NEXT: srl16 $17, $5, 1 +; MMR6-NEXT: srlv $2, $17, $2 +; MMR6-NEXT: or $2, $15, $2 +; MMR6-NEXT: seleqz $2, $2, $3 +; MMR6-NEXT: sltiu $15, $7, 64 +; MMR6-NEXT: or $14, $14, $2 +; MMR6-NEXT: selnez $10, $10, $15 +; MMR6-NEXT: or $9, $11, $9 +; MMR6-NEXT: seleqz $11, $13, $3 +; MMR6-NEXT: seleqz $8, $8, $3 +; MMR6-NEXT: seleqz $3, $12, $3 +; MMR6-NEXT: li16 $17, 0 +; MMR6-NEXT: seleqz $12, $17, $15 +; MMR6-NEXT: addiu $17, $7, -64 +; MMR6-NEXT: seleqz $6, $6, $7 +; MMR6-NEXT: andi16 $2, $17, 63 +; MMR6-NEXT: or $9, $9, $3 +; MMR6-NEXT: or $3, $12, $10 +; MMR6-NEXT: selnez $8, $8, $15 +; MMR6-NEXT: seleqz $1, $1, $7 +; MMR6-NEXT: or $10, $11, $14 +; MMR6-NEXT: selnez $10, $10, $15 +; MMR6-NEXT: srlv $11, $4, $2 +; MMR6-NEXT: andi16 $17, $17, 32 +; MMR6-NEXT: seleqz $4, $11, $17 +; MMR6-NEXT: seleqz $4, $4, $15 ; MMR6-NEXT: or $4, $10, $4 -; MMR6-NEXT: selnez $3, $4, $3 -; MMR6-NEXT: or $4, $9, $3 -; MMR6-NEXT: or $1, $6, $1 -; MMR6-NEXT: selnez $1, $1, $8 -; MMR6-NEXT: or $3, $7, $1 -; MMR6-NEXT: lw $16, 24($sp) # 4-byte Folded Reload -; MMR6-NEXT: lw $17, 28($sp) # 4-byte Folded Reload -; MMR6-NEXT: addiu $sp, $sp, 32 +; MMR6-NEXT: selnez $4, $4, $7 +; MMR6-NEXT: or $4, $6, $4 +; MMR6-NEXT: selnez $6, $9, $15 +; MMR6-NEXT: srlv $5, $5, $2 +; MMR6-NEXT: not16 $2, $2 +; MMR6-NEXT: sllv $2, $16, $2 +; MMR6-NEXT: or $2, $2, $5 +; MMR6-NEXT: seleqz $2, $2, $17 +; MMR6-NEXT: selnez $5, $11, $17 +; MMR6-NEXT: or $2, $5, $2 +; MMR6-NEXT: seleqz $2, $2, $15 +; MMR6-NEXT: or $2, $6, $2 +; MMR6-NEXT: selnez $2, $2, $7 +; MMR6-NEXT: or $5, $1, $2 +; MMR6-NEXT: or $2, $12, $8 +; MMR6-NEXT: lw $16, 0($sp) # 4-byte Folded Reload +; MMR6-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: addiu $sp, $sp, 8 ; MMR6-NEXT: jrc $ra entry: diff --git a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll --- a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll +++ b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll @@ -458,305 +458,310 @@ ; MIPS2: # %bb.0: # %entry ; MIPS2-NEXT: addiu $sp, $sp, -8 ; MIPS2-NEXT: .cfi_def_cfa_offset 8 -; MIPS2-NEXT: sw $17, 4($sp) # 4-byte Folded Spill -; MIPS2-NEXT: sw $16, 0($sp) # 4-byte Folded Spill -; MIPS2-NEXT: .cfi_offset 17, -4 -; MIPS2-NEXT: .cfi_offset 16, -8 -; MIPS2-NEXT: lw $8, 36($sp) +; MIPS2-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS2-NEXT: .cfi_offset 16, -4 +; MIPS2-NEXT: lw $10, 36($sp) ; MIPS2-NEXT: addiu $1, $zero, 64 -; MIPS2-NEXT: subu $3, $1, $8 -; MIPS2-NEXT: srlv $9, $6, $3 -; MIPS2-NEXT: andi $1, $3, 32 -; MIPS2-NEXT: bnez $1, $BB5_2 -; MIPS2-NEXT: addiu $2, $zero, 0 +; MIPS2-NEXT: subu $1, $1, $10 +; MIPS2-NEXT: andi $2, $1, 63 +; MIPS2-NEXT: not $3, $2 +; MIPS2-NEXT: srlv $13, $6, $2 +; MIPS2-NEXT: andi $9, $1, 32 +; MIPS2-NEXT: bnez $9, $BB5_2 +; MIPS2-NEXT: addiu $8, $zero, 0 ; MIPS2-NEXT: # %bb.1: # %entry -; MIPS2-NEXT: srlv $1, $7, $3 -; MIPS2-NEXT: not $3, $3 -; MIPS2-NEXT: sll $10, $6, 1 -; MIPS2-NEXT: sllv $3, $10, $3 -; MIPS2-NEXT: or $3, $3, $1 +; MIPS2-NEXT: sll $1, $6, 1 +; MIPS2-NEXT: srlv $11, $7, $2 +; MIPS2-NEXT: sllv $1, $1, $3 +; MIPS2-NEXT: or $12, $1, $11 ; MIPS2-NEXT: b $BB5_3 -; MIPS2-NEXT: move $15, $9 +; MIPS2-NEXT: move $11, $13 ; MIPS2-NEXT: $BB5_2: -; MIPS2-NEXT: addiu $15, $zero, 0 -; MIPS2-NEXT: move $3, $9 +; MIPS2-NEXT: addiu $11, $zero, 0 +; MIPS2-NEXT: move $12, $13 ; MIPS2-NEXT: $BB5_3: # %entry -; MIPS2-NEXT: not $13, $8 -; MIPS2-NEXT: sllv $9, $5, $8 -; MIPS2-NEXT: andi $10, $8, 32 -; MIPS2-NEXT: bnez $10, $BB5_5 -; MIPS2-NEXT: move $25, $9 +; MIPS2-NEXT: sllv $13, $5, $2 +; MIPS2-NEXT: bnez $9, $BB5_5 +; MIPS2-NEXT: addiu $24, $zero, 0 ; MIPS2-NEXT: # %bb.4: # %entry -; MIPS2-NEXT: sllv $1, $4, $8 -; MIPS2-NEXT: srl $11, $5, 1 -; MIPS2-NEXT: srlv $11, $11, $13 -; MIPS2-NEXT: or $25, $1, $11 +; MIPS2-NEXT: move $24, $13 ; MIPS2-NEXT: $BB5_5: # %entry -; MIPS2-NEXT: addiu $14, $8, -64 -; MIPS2-NEXT: srl $24, $7, 1 -; MIPS2-NEXT: sllv $11, $7, $14 -; MIPS2-NEXT: andi $12, $14, 32 -; MIPS2-NEXT: bnez $12, $BB5_7 -; MIPS2-NEXT: move $gp, $11 +; MIPS2-NEXT: bnez $9, $BB5_7 +; MIPS2-NEXT: nop ; MIPS2-NEXT: # %bb.6: # %entry -; MIPS2-NEXT: sllv $1, $6, $14 -; MIPS2-NEXT: not $14, $14 -; MIPS2-NEXT: srlv $14, $24, $14 -; MIPS2-NEXT: or $gp, $1, $14 +; MIPS2-NEXT: srl $1, $5, 1 +; MIPS2-NEXT: sllv $13, $4, $2 +; MIPS2-NEXT: srlv $1, $1, $3 +; MIPS2-NEXT: or $13, $13, $1 ; MIPS2-NEXT: $BB5_7: # %entry -; MIPS2-NEXT: sltiu $14, $8, 64 -; MIPS2-NEXT: beqz $14, $BB5_9 +; MIPS2-NEXT: addiu $1, $10, -64 +; MIPS2-NEXT: andi $25, $1, 63 +; MIPS2-NEXT: sllv $15, $7, $25 +; MIPS2-NEXT: andi $16, $1, 32 +; MIPS2-NEXT: beqz $16, $BB5_20 +; MIPS2-NEXT: addiu $14, $zero, 0 +; MIPS2-NEXT: # %bb.8: # %entry +; MIPS2-NEXT: sltiu $gp, $10, 64 +; MIPS2-NEXT: bnez $gp, $BB5_21 ; MIPS2-NEXT: nop -; MIPS2-NEXT: # %bb.8: -; MIPS2-NEXT: or $gp, $25, $15 ; MIPS2-NEXT: $BB5_9: # %entry -; MIPS2-NEXT: sllv $25, $7, $8 -; MIPS2-NEXT: bnez $10, $BB5_11 -; MIPS2-NEXT: addiu $17, $zero, 0 -; MIPS2-NEXT: # %bb.10: # %entry -; MIPS2-NEXT: move $17, $25 -; MIPS2-NEXT: $BB5_11: # %entry -; MIPS2-NEXT: addiu $1, $zero, 63 -; MIPS2-NEXT: sltiu $15, $8, 1 -; MIPS2-NEXT: beqz $15, $BB5_21 -; MIPS2-NEXT: sltu $16, $1, $8 -; MIPS2-NEXT: # %bb.12: # %entry ; MIPS2-NEXT: beqz $16, $BB5_22 -; MIPS2-NEXT: addiu $7, $zero, 0 -; MIPS2-NEXT: $BB5_13: # %entry -; MIPS2-NEXT: beqz $10, $BB5_23 +; MIPS2-NEXT: srl $12, $7, 1 +; MIPS2-NEXT: $BB5_10: # %entry +; MIPS2-NEXT: bnez $gp, $BB5_23 +; MIPS2-NEXT: sltiu $24, $10, 1 +; MIPS2-NEXT: $BB5_11: # %entry +; MIPS2-NEXT: beqz $24, $BB5_24 ; MIPS2-NEXT: nop +; MIPS2-NEXT: $BB5_12: # %entry +; MIPS2-NEXT: bnez $24, $BB5_14 +; MIPS2-NEXT: addiu $11, $zero, 63 +; MIPS2-NEXT: $BB5_13: # %entry +; MIPS2-NEXT: move $5, $14 ; MIPS2-NEXT: $BB5_14: # %entry -; MIPS2-NEXT: beqz $16, $BB5_24 -; MIPS2-NEXT: addiu $6, $zero, 0 -; MIPS2-NEXT: $BB5_15: # %entry -; MIPS2-NEXT: beqz $10, $BB5_25 -; MIPS2-NEXT: addiu $8, $zero, 0 +; MIPS2-NEXT: sltu $10, $11, $10 +; MIPS2-NEXT: sllv $11, $7, $2 +; MIPS2-NEXT: beqz $9, $BB5_26 +; MIPS2-NEXT: addiu $13, $zero, 0 +; MIPS2-NEXT: # %bb.15: # %entry +; MIPS2-NEXT: beqz $10, $BB5_27 +; MIPS2-NEXT: addiu $7, $zero, 0 ; MIPS2-NEXT: $BB5_16: # %entry -; MIPS2-NEXT: beqz $12, $BB5_26 +; MIPS2-NEXT: beqz $9, $BB5_28 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_17: # %entry -; MIPS2-NEXT: bnez $14, $BB5_27 +; MIPS2-NEXT: bnez $10, $BB5_19 ; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_18: # %entry -; MIPS2-NEXT: bnez $15, $BB5_20 -; MIPS2-NEXT: nop +; MIPS2-NEXT: move $8, $11 ; MIPS2-NEXT: $BB5_19: # %entry -; MIPS2-NEXT: move $5, $2 -; MIPS2-NEXT: $BB5_20: # %entry ; MIPS2-NEXT: move $2, $4 ; MIPS2-NEXT: move $3, $5 -; MIPS2-NEXT: move $4, $6 +; MIPS2-NEXT: move $4, $8 ; MIPS2-NEXT: move $5, $7 -; MIPS2-NEXT: lw $16, 0($sp) # 4-byte Folded Reload -; MIPS2-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; MIPS2-NEXT: lw $16, 4($sp) # 4-byte Folded Reload ; MIPS2-NEXT: jr $ra ; MIPS2-NEXT: addiu $sp, $sp, 8 -; MIPS2-NEXT: $BB5_21: # %entry -; MIPS2-NEXT: move $4, $gp -; MIPS2-NEXT: bnez $16, $BB5_13 -; MIPS2-NEXT: addiu $7, $zero, 0 +; MIPS2-NEXT: $BB5_20: # %entry +; MIPS2-NEXT: sltiu $gp, $10, 64 +; MIPS2-NEXT: beqz $gp, $BB5_9 +; MIPS2-NEXT: move $14, $15 +; MIPS2-NEXT: $BB5_21: +; MIPS2-NEXT: or $14, $24, $12 +; MIPS2-NEXT: bnez $16, $BB5_10 +; MIPS2-NEXT: srl $12, $7, 1 ; MIPS2-NEXT: $BB5_22: # %entry -; MIPS2-NEXT: bnez $10, $BB5_14 -; MIPS2-NEXT: move $7, $17 -; MIPS2-NEXT: $BB5_23: # %entry -; MIPS2-NEXT: sllv $1, $6, $8 -; MIPS2-NEXT: srlv $6, $24, $13 -; MIPS2-NEXT: or $25, $1, $6 -; MIPS2-NEXT: bnez $16, $BB5_15 -; MIPS2-NEXT: addiu $6, $zero, 0 +; MIPS2-NEXT: sllv $1, $6, $25 +; MIPS2-NEXT: not $15, $25 +; MIPS2-NEXT: srlv $15, $12, $15 +; MIPS2-NEXT: or $15, $1, $15 +; MIPS2-NEXT: beqz $gp, $BB5_11 +; MIPS2-NEXT: sltiu $24, $10, 1 +; MIPS2-NEXT: $BB5_23: +; MIPS2-NEXT: bnez $24, $BB5_12 +; MIPS2-NEXT: or $15, $13, $11 ; MIPS2-NEXT: $BB5_24: # %entry -; MIPS2-NEXT: move $6, $25 -; MIPS2-NEXT: bnez $10, $BB5_16 -; MIPS2-NEXT: addiu $8, $zero, 0 -; MIPS2-NEXT: $BB5_25: # %entry -; MIPS2-NEXT: bnez $12, $BB5_17 -; MIPS2-NEXT: move $8, $9 +; MIPS2-NEXT: move $4, $15 +; MIPS2-NEXT: bnez $24, $BB5_14 +; MIPS2-NEXT: addiu $11, $zero, 63 +; MIPS2-NEXT: # %bb.25: # %entry +; MIPS2-NEXT: b $BB5_13 +; MIPS2-NEXT: nop ; MIPS2-NEXT: $BB5_26: # %entry -; MIPS2-NEXT: beqz $14, $BB5_18 -; MIPS2-NEXT: move $2, $11 -; MIPS2-NEXT: $BB5_27: -; MIPS2-NEXT: bnez $15, $BB5_20 -; MIPS2-NEXT: or $2, $8, $3 -; MIPS2-NEXT: # %bb.28: -; MIPS2-NEXT: b $BB5_19 +; MIPS2-NEXT: move $13, $11 +; MIPS2-NEXT: bnez $10, $BB5_16 +; MIPS2-NEXT: addiu $7, $zero, 0 +; MIPS2-NEXT: $BB5_27: # %entry +; MIPS2-NEXT: bnez $9, $BB5_17 +; MIPS2-NEXT: move $7, $13 +; MIPS2-NEXT: $BB5_28: # %entry +; MIPS2-NEXT: sllv $1, $6, $2 +; MIPS2-NEXT: srlv $2, $12, $3 +; MIPS2-NEXT: bnez $10, $BB5_19 +; MIPS2-NEXT: or $11, $1, $2 +; MIPS2-NEXT: # %bb.29: # %entry +; MIPS2-NEXT: b $BB5_18 ; MIPS2-NEXT: nop ; ; MIPS32-LABEL: shl_i128: ; MIPS32: # %bb.0: # %entry -; MIPS32-NEXT: lw $8, 28($sp) -; MIPS32-NEXT: addiu $1, $zero, 64 -; MIPS32-NEXT: subu $1, $1, $8 -; MIPS32-NEXT: srlv $9, $6, $1 -; MIPS32-NEXT: andi $10, $1, 32 -; MIPS32-NEXT: move $2, $9 -; MIPS32-NEXT: movn $2, $zero, $10 -; MIPS32-NEXT: sllv $3, $4, $8 -; MIPS32-NEXT: not $11, $8 -; MIPS32-NEXT: srl $12, $5, 1 -; MIPS32-NEXT: srlv $12, $12, $11 -; MIPS32-NEXT: or $3, $3, $12 -; MIPS32-NEXT: sllv $12, $5, $8 -; MIPS32-NEXT: andi $13, $8, 32 -; MIPS32-NEXT: movn $3, $12, $13 -; MIPS32-NEXT: addiu $14, $8, -64 -; MIPS32-NEXT: or $15, $3, $2 -; MIPS32-NEXT: sllv $2, $6, $14 -; MIPS32-NEXT: srl $24, $7, 1 -; MIPS32-NEXT: not $3, $14 -; MIPS32-NEXT: srlv $3, $24, $3 -; MIPS32-NEXT: or $2, $2, $3 -; MIPS32-NEXT: sllv $3, $7, $14 -; MIPS32-NEXT: andi $14, $14, 32 -; MIPS32-NEXT: movn $2, $3, $14 -; MIPS32-NEXT: sltiu $25, $8, 64 -; MIPS32-NEXT: movn $2, $15, $25 -; MIPS32-NEXT: srlv $15, $7, $1 -; MIPS32-NEXT: not $1, $1 -; MIPS32-NEXT: sll $gp, $6, 1 -; MIPS32-NEXT: sllv $1, $gp, $1 -; MIPS32-NEXT: or $15, $1, $15 +; MIPS32-NEXT: lw $1, 28($sp) +; MIPS32-NEXT: addiu $2, $zero, 64 +; MIPS32-NEXT: subu $2, $2, $1 +; MIPS32-NEXT: andi $8, $2, 63 +; MIPS32-NEXT: sllv $3, $5, $8 +; MIPS32-NEXT: andi $9, $2, 32 +; MIPS32-NEXT: sllv $2, $4, $8 +; MIPS32-NEXT: not $10, $8 +; MIPS32-NEXT: srl $11, $5, 1 +; MIPS32-NEXT: srlv $11, $11, $10 +; MIPS32-NEXT: or $2, $2, $11 +; MIPS32-NEXT: movn $2, $3, $9 +; MIPS32-NEXT: movn $3, $zero, $9 +; MIPS32-NEXT: srlv $11, $7, $8 +; MIPS32-NEXT: sll $12, $6, 1 +; MIPS32-NEXT: sllv $12, $12, $10 +; MIPS32-NEXT: or $11, $12, $11 +; MIPS32-NEXT: srlv $12, $6, $8 +; MIPS32-NEXT: movn $11, $12, $9 +; MIPS32-NEXT: or $11, $3, $11 +; MIPS32-NEXT: movn $12, $zero, $9 +; MIPS32-NEXT: addiu $3, $1, -64 +; MIPS32-NEXT: andi $13, $3, 63 +; MIPS32-NEXT: sllv $14, $7, $13 +; MIPS32-NEXT: andi $15, $3, 32 +; MIPS32-NEXT: move $3, $14 +; MIPS32-NEXT: movn $3, $zero, $15 +; MIPS32-NEXT: sltiu $24, $1, 64 +; MIPS32-NEXT: movn $3, $11, $24 +; MIPS32-NEXT: or $11, $2, $12 +; MIPS32-NEXT: sllv $2, $6, $13 +; MIPS32-NEXT: srl $12, $7, 1 +; MIPS32-NEXT: not $13, $13 +; MIPS32-NEXT: srlv $13, $12, $13 +; MIPS32-NEXT: or $2, $2, $13 +; MIPS32-NEXT: movn $2, $14, $15 +; MIPS32-NEXT: movn $2, $11, $24 +; MIPS32-NEXT: sllv $7, $7, $8 +; MIPS32-NEXT: movz $2, $4, $1 +; MIPS32-NEXT: movz $3, $5, $1 +; MIPS32-NEXT: move $5, $7 +; MIPS32-NEXT: movn $5, $zero, $9 +; MIPS32-NEXT: movz $5, $zero, $24 ; MIPS32-NEXT: sllv $1, $6, $8 -; MIPS32-NEXT: srlv $6, $24, $11 -; MIPS32-NEXT: or $1, $1, $6 -; MIPS32-NEXT: sllv $6, $7, $8 -; MIPS32-NEXT: movn $1, $6, $13 -; MIPS32-NEXT: movz $2, $4, $8 -; MIPS32-NEXT: movz $1, $zero, $25 -; MIPS32-NEXT: movn $15, $9, $10 -; MIPS32-NEXT: movn $12, $zero, $13 -; MIPS32-NEXT: or $4, $12, $15 -; MIPS32-NEXT: movn $3, $zero, $14 -; MIPS32-NEXT: movn $3, $4, $25 -; MIPS32-NEXT: movz $3, $5, $8 -; MIPS32-NEXT: movn $6, $zero, $13 -; MIPS32-NEXT: movz $6, $zero, $25 -; MIPS32-NEXT: move $4, $1 +; MIPS32-NEXT: srlv $4, $12, $10 +; MIPS32-NEXT: or $4, $1, $4 +; MIPS32-NEXT: movn $4, $7, $9 ; MIPS32-NEXT: jr $ra -; MIPS32-NEXT: move $5, $6 +; MIPS32-NEXT: movz $4, $zero, $24 ; ; MIPS32R2-LABEL: shl_i128: ; MIPS32R2: # %bb.0: # %entry -; MIPS32R2-NEXT: lw $8, 28($sp) -; MIPS32R2-NEXT: addiu $1, $zero, 64 -; MIPS32R2-NEXT: subu $1, $1, $8 -; MIPS32R2-NEXT: srlv $9, $6, $1 -; MIPS32R2-NEXT: andi $10, $1, 32 -; MIPS32R2-NEXT: move $2, $9 -; MIPS32R2-NEXT: movn $2, $zero, $10 -; MIPS32R2-NEXT: sllv $3, $4, $8 -; MIPS32R2-NEXT: not $11, $8 -; MIPS32R2-NEXT: srl $12, $5, 1 -; MIPS32R2-NEXT: srlv $12, $12, $11 -; MIPS32R2-NEXT: or $3, $3, $12 -; MIPS32R2-NEXT: sllv $12, $5, $8 -; MIPS32R2-NEXT: andi $13, $8, 32 -; MIPS32R2-NEXT: movn $3, $12, $13 -; MIPS32R2-NEXT: addiu $14, $8, -64 -; MIPS32R2-NEXT: or $15, $3, $2 -; MIPS32R2-NEXT: sllv $2, $6, $14 -; MIPS32R2-NEXT: srl $24, $7, 1 -; MIPS32R2-NEXT: not $3, $14 -; MIPS32R2-NEXT: srlv $3, $24, $3 -; MIPS32R2-NEXT: or $2, $2, $3 -; MIPS32R2-NEXT: sllv $3, $7, $14 -; MIPS32R2-NEXT: andi $14, $14, 32 -; MIPS32R2-NEXT: movn $2, $3, $14 -; MIPS32R2-NEXT: sltiu $25, $8, 64 -; MIPS32R2-NEXT: movn $2, $15, $25 -; MIPS32R2-NEXT: srlv $15, $7, $1 -; MIPS32R2-NEXT: not $1, $1 -; MIPS32R2-NEXT: sll $gp, $6, 1 -; MIPS32R2-NEXT: sllv $1, $gp, $1 -; MIPS32R2-NEXT: or $15, $1, $15 +; MIPS32R2-NEXT: lw $1, 28($sp) +; MIPS32R2-NEXT: addiu $2, $zero, 64 +; MIPS32R2-NEXT: subu $2, $2, $1 +; MIPS32R2-NEXT: andi $8, $2, 63 +; MIPS32R2-NEXT: sllv $3, $5, $8 +; MIPS32R2-NEXT: andi $9, $2, 32 +; MIPS32R2-NEXT: sllv $2, $4, $8 +; MIPS32R2-NEXT: not $10, $8 +; MIPS32R2-NEXT: srl $11, $5, 1 +; MIPS32R2-NEXT: srlv $11, $11, $10 +; MIPS32R2-NEXT: or $2, $2, $11 +; MIPS32R2-NEXT: movn $2, $3, $9 +; MIPS32R2-NEXT: movn $3, $zero, $9 +; MIPS32R2-NEXT: srlv $11, $7, $8 +; MIPS32R2-NEXT: sll $12, $6, 1 +; MIPS32R2-NEXT: sllv $12, $12, $10 +; MIPS32R2-NEXT: or $11, $12, $11 +; MIPS32R2-NEXT: srlv $12, $6, $8 +; MIPS32R2-NEXT: movn $11, $12, $9 +; MIPS32R2-NEXT: or $11, $3, $11 +; MIPS32R2-NEXT: movn $12, $zero, $9 +; MIPS32R2-NEXT: addiu $3, $1, -64 +; MIPS32R2-NEXT: andi $13, $3, 63 +; MIPS32R2-NEXT: sllv $14, $7, $13 +; MIPS32R2-NEXT: andi $15, $3, 32 +; MIPS32R2-NEXT: move $3, $14 +; MIPS32R2-NEXT: movn $3, $zero, $15 +; MIPS32R2-NEXT: sltiu $24, $1, 64 +; MIPS32R2-NEXT: movn $3, $11, $24 +; MIPS32R2-NEXT: or $11, $2, $12 +; MIPS32R2-NEXT: sllv $2, $6, $13 +; MIPS32R2-NEXT: srl $12, $7, 1 +; MIPS32R2-NEXT: not $13, $13 +; MIPS32R2-NEXT: srlv $13, $12, $13 +; MIPS32R2-NEXT: or $2, $2, $13 +; MIPS32R2-NEXT: movn $2, $14, $15 +; MIPS32R2-NEXT: movn $2, $11, $24 +; MIPS32R2-NEXT: sllv $7, $7, $8 +; MIPS32R2-NEXT: movz $2, $4, $1 +; MIPS32R2-NEXT: movz $3, $5, $1 +; MIPS32R2-NEXT: move $5, $7 +; MIPS32R2-NEXT: movn $5, $zero, $9 +; MIPS32R2-NEXT: movz $5, $zero, $24 ; MIPS32R2-NEXT: sllv $1, $6, $8 -; MIPS32R2-NEXT: srlv $6, $24, $11 -; MIPS32R2-NEXT: or $1, $1, $6 -; MIPS32R2-NEXT: sllv $6, $7, $8 -; MIPS32R2-NEXT: movn $1, $6, $13 -; MIPS32R2-NEXT: movz $2, $4, $8 -; MIPS32R2-NEXT: movz $1, $zero, $25 -; MIPS32R2-NEXT: movn $15, $9, $10 -; MIPS32R2-NEXT: movn $12, $zero, $13 -; MIPS32R2-NEXT: or $4, $12, $15 -; MIPS32R2-NEXT: movn $3, $zero, $14 -; MIPS32R2-NEXT: movn $3, $4, $25 -; MIPS32R2-NEXT: movz $3, $5, $8 -; MIPS32R2-NEXT: movn $6, $zero, $13 -; MIPS32R2-NEXT: movz $6, $zero, $25 -; MIPS32R2-NEXT: move $4, $1 +; MIPS32R2-NEXT: srlv $4, $12, $10 +; MIPS32R2-NEXT: or $4, $1, $4 +; MIPS32R2-NEXT: movn $4, $7, $9 ; MIPS32R2-NEXT: jr $ra -; MIPS32R2-NEXT: move $5, $6 +; MIPS32R2-NEXT: movz $4, $zero, $24 ; ; MIPS32R6-LABEL: shl_i128: ; MIPS32R6: # %bb.0: # %entry -; MIPS32R6-NEXT: lw $3, 28($sp) -; MIPS32R6-NEXT: sllv $1, $4, $3 -; MIPS32R6-NEXT: not $2, $3 -; MIPS32R6-NEXT: srl $8, $5, 1 -; MIPS32R6-NEXT: srlv $8, $8, $2 -; MIPS32R6-NEXT: or $1, $1, $8 -; MIPS32R6-NEXT: sllv $8, $5, $3 -; MIPS32R6-NEXT: andi $9, $3, 32 -; MIPS32R6-NEXT: seleqz $1, $1, $9 -; MIPS32R6-NEXT: selnez $10, $8, $9 -; MIPS32R6-NEXT: addiu $11, $zero, 64 -; MIPS32R6-NEXT: subu $11, $11, $3 -; MIPS32R6-NEXT: srlv $12, $6, $11 -; MIPS32R6-NEXT: andi $13, $11, 32 -; MIPS32R6-NEXT: seleqz $14, $12, $13 -; MIPS32R6-NEXT: or $1, $10, $1 -; MIPS32R6-NEXT: selnez $10, $12, $13 -; MIPS32R6-NEXT: srlv $12, $7, $11 -; MIPS32R6-NEXT: not $11, $11 -; MIPS32R6-NEXT: sll $15, $6, 1 -; MIPS32R6-NEXT: sllv $11, $15, $11 -; MIPS32R6-NEXT: or $11, $11, $12 -; MIPS32R6-NEXT: seleqz $11, $11, $13 -; MIPS32R6-NEXT: addiu $12, $3, -64 -; MIPS32R6-NEXT: or $10, $10, $11 -; MIPS32R6-NEXT: or $1, $1, $14 -; MIPS32R6-NEXT: sllv $11, $6, $12 -; MIPS32R6-NEXT: srl $13, $7, 1 -; MIPS32R6-NEXT: not $14, $12 -; MIPS32R6-NEXT: srlv $14, $13, $14 +; MIPS32R6-NEXT: addiu $sp, $sp, -8 +; MIPS32R6-NEXT: .cfi_def_cfa_offset 8 +; MIPS32R6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MIPS32R6-NEXT: .cfi_offset 16, -4 +; MIPS32R6-NEXT: lw $3, 36($sp) +; MIPS32R6-NEXT: addiu $1, $zero, 64 +; MIPS32R6-NEXT: subu $1, $1, $3 +; MIPS32R6-NEXT: andi $2, $1, 63 +; MIPS32R6-NEXT: sllv $8, $6, $2 +; MIPS32R6-NEXT: not $9, $2 +; MIPS32R6-NEXT: srl $10, $7, 1 +; MIPS32R6-NEXT: srlv $11, $10, $9 +; MIPS32R6-NEXT: sllv $12, $7, $2 +; MIPS32R6-NEXT: andi $1, $1, 32 +; MIPS32R6-NEXT: selnez $13, $12, $1 +; MIPS32R6-NEXT: or $8, $8, $11 +; MIPS32R6-NEXT: seleqz $8, $8, $1 +; MIPS32R6-NEXT: sllv $11, $4, $2 +; MIPS32R6-NEXT: srl $14, $5, 1 +; MIPS32R6-NEXT: srlv $14, $14, $9 +; MIPS32R6-NEXT: or $8, $13, $8 ; MIPS32R6-NEXT: or $11, $11, $14 -; MIPS32R6-NEXT: andi $14, $12, 32 -; MIPS32R6-NEXT: seleqz $11, $11, $14 -; MIPS32R6-NEXT: sllv $12, $7, $12 -; MIPS32R6-NEXT: selnez $15, $12, $14 +; MIPS32R6-NEXT: srlv $13, $7, $2 +; MIPS32R6-NEXT: sll $14, $6, 1 +; MIPS32R6-NEXT: sllv $9, $14, $9 +; MIPS32R6-NEXT: seleqz $11, $11, $1 +; MIPS32R6-NEXT: or $9, $9, $13 +; MIPS32R6-NEXT: srlv $13, $6, $2 +; MIPS32R6-NEXT: addiu $14, $3, -64 +; MIPS32R6-NEXT: andi $15, $14, 63 ; MIPS32R6-NEXT: sltiu $24, $3, 64 -; MIPS32R6-NEXT: selnez $1, $1, $24 -; MIPS32R6-NEXT: or $11, $15, $11 -; MIPS32R6-NEXT: sllv $6, $6, $3 -; MIPS32R6-NEXT: srlv $2, $13, $2 -; MIPS32R6-NEXT: seleqz $8, $8, $9 -; MIPS32R6-NEXT: or $8, $8, $10 -; MIPS32R6-NEXT: or $6, $6, $2 -; MIPS32R6-NEXT: seleqz $2, $11, $24 +; MIPS32R6-NEXT: selnez $8, $8, $24 +; MIPS32R6-NEXT: selnez $25, $13, $1 +; MIPS32R6-NEXT: seleqz $9, $9, $1 +; MIPS32R6-NEXT: sllv $2, $5, $2 +; MIPS32R6-NEXT: selnez $gp, $2, $1 +; MIPS32R6-NEXT: seleqz $12, $12, $1 +; MIPS32R6-NEXT: not $16, $15 +; MIPS32R6-NEXT: or $11, $gp, $11 +; MIPS32R6-NEXT: seleqz $13, $13, $1 +; MIPS32R6-NEXT: or $9, $25, $9 +; MIPS32R6-NEXT: seleqz $1, $2, $1 +; MIPS32R6-NEXT: sllv $2, $6, $15 +; MIPS32R6-NEXT: srlv $6, $10, $16 ; MIPS32R6-NEXT: seleqz $10, $zero, $24 -; MIPS32R6-NEXT: sllv $7, $7, $3 -; MIPS32R6-NEXT: seleqz $11, $7, $9 -; MIPS32R6-NEXT: selnez $11, $11, $24 -; MIPS32R6-NEXT: seleqz $4, $4, $3 -; MIPS32R6-NEXT: or $1, $1, $2 -; MIPS32R6-NEXT: selnez $1, $1, $3 -; MIPS32R6-NEXT: or $2, $4, $1 -; MIPS32R6-NEXT: or $1, $10, $11 -; MIPS32R6-NEXT: seleqz $4, $6, $9 -; MIPS32R6-NEXT: selnez $6, $7, $9 +; MIPS32R6-NEXT: or $9, $1, $9 +; MIPS32R6-NEXT: or $1, $10, $8 +; MIPS32R6-NEXT: selnez $8, $12, $24 ; MIPS32R6-NEXT: seleqz $5, $5, $3 -; MIPS32R6-NEXT: selnez $7, $8, $24 -; MIPS32R6-NEXT: seleqz $8, $12, $14 -; MIPS32R6-NEXT: seleqz $8, $8, $24 -; MIPS32R6-NEXT: or $7, $7, $8 -; MIPS32R6-NEXT: selnez $3, $7, $3 +; MIPS32R6-NEXT: seleqz $4, $4, $3 +; MIPS32R6-NEXT: or $11, $11, $13 +; MIPS32R6-NEXT: selnez $11, $11, $24 +; MIPS32R6-NEXT: or $2, $2, $6 +; MIPS32R6-NEXT: andi $6, $14, 32 +; MIPS32R6-NEXT: seleqz $2, $2, $6 +; MIPS32R6-NEXT: sllv $7, $7, $15 +; MIPS32R6-NEXT: selnez $12, $7, $6 +; MIPS32R6-NEXT: or $2, $12, $2 +; MIPS32R6-NEXT: seleqz $2, $2, $24 +; MIPS32R6-NEXT: or $2, $11, $2 +; MIPS32R6-NEXT: selnez $2, $2, $3 +; MIPS32R6-NEXT: or $2, $4, $2 +; MIPS32R6-NEXT: selnez $4, $9, $24 +; MIPS32R6-NEXT: seleqz $6, $7, $6 +; MIPS32R6-NEXT: seleqz $6, $6, $24 +; MIPS32R6-NEXT: or $4, $4, $6 +; MIPS32R6-NEXT: selnez $3, $4, $3 ; MIPS32R6-NEXT: or $3, $5, $3 -; MIPS32R6-NEXT: or $4, $6, $4 -; MIPS32R6-NEXT: selnez $4, $4, $24 -; MIPS32R6-NEXT: or $4, $10, $4 +; MIPS32R6-NEXT: or $5, $10, $8 +; MIPS32R6-NEXT: move $4, $1 +; MIPS32R6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload ; MIPS32R6-NEXT: jr $ra -; MIPS32R6-NEXT: move $5, $1 +; MIPS32R6-NEXT: addiu $sp, $sp, 8 ; ; MIPS3-LABEL: shl_i128: ; MIPS3: # %bb.0: # %entry @@ -849,81 +854,83 @@ ; MMR3-NEXT: swp $16, 32($sp) ; MMR3-NEXT: .cfi_offset 17, -4 ; MMR3-NEXT: .cfi_offset 16, -8 -; MMR3-NEXT: move $17, $7 -; MMR3-NEXT: sw $7, 4($sp) # 4-byte Folded Spill -; MMR3-NEXT: move $7, $6 +; MMR3-NEXT: sw $7, 24($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $6, 28($sp) # 4-byte Folded Spill +; MMR3-NEXT: sw $5, 12($sp) # 4-byte Folded Spill ; MMR3-NEXT: move $1, $4 -; MMR3-NEXT: lw $16, 68($sp) -; MMR3-NEXT: li16 $2, 64 -; MMR3-NEXT: subu16 $6, $2, $16 -; MMR3-NEXT: srlv $9, $7, $6 -; MMR3-NEXT: andi16 $4, $6, 32 -; MMR3-NEXT: sw $4, 24($sp) # 4-byte Folded Spill -; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: move $2, $9 -; MMR3-NEXT: movn $2, $3, $4 -; MMR3-NEXT: sllv $3, $1, $16 +; MMR3-NEXT: lw $3, 68($sp) ; MMR3-NEXT: sw $3, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: not16 $4, $16 -; MMR3-NEXT: sw $4, 20($sp) # 4-byte Folded Spill -; MMR3-NEXT: sw $5, 28($sp) # 4-byte Folded Spill -; MMR3-NEXT: srl16 $3, $5, 1 -; MMR3-NEXT: srlv $3, $3, $4 -; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: sllv $5, $5, $16 -; MMR3-NEXT: sw $5, 8($sp) # 4-byte Folded Spill -; MMR3-NEXT: andi16 $4, $16, 32 -; MMR3-NEXT: sw $4, 16($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $3, $5, $4 -; MMR3-NEXT: addiu $4, $16, -64 -; MMR3-NEXT: or16 $3, $2 -; MMR3-NEXT: sllv $2, $7, $4 -; MMR3-NEXT: sw $2, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: srl16 $5, $17, 1 -; MMR3-NEXT: not16 $2, $4 -; MMR3-NEXT: srlv $2, $5, $2 -; MMR3-NEXT: lw $17, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: or16 $2, $17 -; MMR3-NEXT: lw $17, 4($sp) # 4-byte Folded Reload -; MMR3-NEXT: sllv $8, $17, $4 -; MMR3-NEXT: andi16 $4, $4, 32 -; MMR3-NEXT: sw $4, 12($sp) # 4-byte Folded Spill -; MMR3-NEXT: movn $2, $8, $4 -; MMR3-NEXT: sltiu $10, $16, 64 -; MMR3-NEXT: movn $2, $3, $10 -; MMR3-NEXT: srlv $4, $17, $6 -; MMR3-NEXT: not16 $3, $6 -; MMR3-NEXT: sll16 $6, $7, 1 -; MMR3-NEXT: sllv $3, $6, $3 -; MMR3-NEXT: or16 $3, $4 -; MMR3-NEXT: sllv $6, $7, $16 -; MMR3-NEXT: lw $4, 20($sp) # 4-byte Folded Reload -; MMR3-NEXT: srlv $4, $5, $4 -; MMR3-NEXT: or16 $4, $6 -; MMR3-NEXT: sllv $6, $17, $16 -; MMR3-NEXT: lw $17, 16($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $4, $6, $17 -; MMR3-NEXT: movz $2, $1, $16 +; MMR3-NEXT: li16 $2, 64 +; MMR3-NEXT: subu16 $2, $2, $3 +; MMR3-NEXT: andi16 $4, $2, 63 +; MMR3-NEXT: sllv $3, $5, $4 +; MMR3-NEXT: andi16 $2, $2, 32 +; MMR3-NEXT: sw $2, 20($sp) # 4-byte Folded Spill +; MMR3-NEXT: sllv $6, $1, $4 +; MMR3-NEXT: not16 $7, $4 +; MMR3-NEXT: move $17, $4 +; MMR3-NEXT: sw $4, 4($sp) # 4-byte Folded Spill +; MMR3-NEXT: srl16 $2, $5, 1 +; MMR3-NEXT: srlv $16, $2, $7 +; MMR3-NEXT: move $4, $7 +; MMR3-NEXT: sw $7, 8($sp) # 4-byte Folded Spill ; MMR3-NEXT: li16 $5, 0 -; MMR3-NEXT: movz $4, $5, $10 -; MMR3-NEXT: lw $7, 24($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $3, $9, $7 -; MMR3-NEXT: lw $5, 8($sp) # 4-byte Folded Reload -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $5, $7, $17 -; MMR3-NEXT: or16 $5, $3 -; MMR3-NEXT: lw $3, 12($sp) # 4-byte Folded Reload -; MMR3-NEXT: movn $8, $7, $3 -; MMR3-NEXT: li16 $7, 0 -; MMR3-NEXT: movn $8, $5, $10 -; MMR3-NEXT: lw $3, 28($sp) # 4-byte Folded Reload -; MMR3-NEXT: movz $8, $3, $16 -; MMR3-NEXT: movn $6, $7, $17 +; MMR3-NEXT: or16 $16, $6 +; MMR3-NEXT: lw $2, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $16, $3, $2 +; MMR3-NEXT: movn $3, $5, $2 +; MMR3-NEXT: lw $5, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $7, $5, $17 +; MMR3-NEXT: lw $5, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: sll16 $6, $5, 1 +; MMR3-NEXT: sllv $6, $6, $4 +; MMR3-NEXT: or16 $6, $7 +; MMR3-NEXT: srlv $4, $5, $17 +; MMR3-NEXT: movn $6, $4, $2 +; MMR3-NEXT: or16 $6, $3 ; MMR3-NEXT: li16 $3, 0 -; MMR3-NEXT: movz $6, $3, $10 +; MMR3-NEXT: movn $4, $3, $2 +; MMR3-NEXT: lw $2, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: addiu $3, $2, -64 +; MMR3-NEXT: andi16 $7, $3, 63 +; MMR3-NEXT: lw $5, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $8, $5, $7 +; MMR3-NEXT: andi16 $5, $3, 32 ; MMR3-NEXT: move $3, $8 -; MMR3-NEXT: move $5, $6 +; MMR3-NEXT: li16 $17, 0 +; MMR3-NEXT: movn $3, $17, $5 +; MMR3-NEXT: sltiu $9, $2, 64 +; MMR3-NEXT: movn $3, $6, $9 +; MMR3-NEXT: or16 $4, $16 +; MMR3-NEXT: lw $2, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $17, $2, $7 +; MMR3-NEXT: lw $2, 24($sp) # 4-byte Folded Reload +; MMR3-NEXT: srl16 $16, $2, 1 +; MMR3-NEXT: move $6, $2 +; MMR3-NEXT: not16 $2, $7 +; MMR3-NEXT: srlv $2, $16, $2 +; MMR3-NEXT: or16 $2, $17 +; MMR3-NEXT: movn $2, $8, $5 +; MMR3-NEXT: movn $2, $4, $9 +; MMR3-NEXT: lw $17, 4($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $8, $6, $17 +; MMR3-NEXT: lw $4, 16($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $2, $1, $4 +; MMR3-NEXT: lw $5, 12($sp) # 4-byte Folded Reload +; MMR3-NEXT: movz $3, $5, $4 +; MMR3-NEXT: move $5, $8 +; MMR3-NEXT: li16 $4, 0 +; MMR3-NEXT: lw $6, 20($sp) # 4-byte Folded Reload +; MMR3-NEXT: movn $5, $4, $6 +; MMR3-NEXT: movz $5, $4, $9 +; MMR3-NEXT: lw $4, 28($sp) # 4-byte Folded Reload +; MMR3-NEXT: sllv $7, $4, $17 +; MMR3-NEXT: lw $4, 8($sp) # 4-byte Folded Reload +; MMR3-NEXT: srlv $4, $16, $4 +; MMR3-NEXT: or16 $4, $7 +; MMR3-NEXT: movn $4, $8, $6 +; MMR3-NEXT: li16 $6, 0 +; MMR3-NEXT: movz $4, $6, $9 ; MMR3-NEXT: lwp $16, 32($sp) ; MMR3-NEXT: addiusp 40 ; MMR3-NEXT: jrc $ra @@ -936,76 +943,76 @@ ; MMR6-NEXT: sw $16, 8($sp) # 4-byte Folded Spill ; MMR6-NEXT: .cfi_offset 17, -4 ; MMR6-NEXT: .cfi_offset 16, -8 -; MMR6-NEXT: move $11, $4 +; MMR6-NEXT: move $8, $4 ; MMR6-NEXT: lw $3, 44($sp) -; MMR6-NEXT: sllv $1, $4, $3 -; MMR6-NEXT: not16 $2, $3 -; MMR6-NEXT: sw $2, 4($sp) # 4-byte Folded Spill -; MMR6-NEXT: srl16 $16, $5, 1 -; MMR6-NEXT: srlv $8, $16, $2 -; MMR6-NEXT: or $1, $1, $8 -; MMR6-NEXT: sllv $8, $5, $3 -; MMR6-NEXT: andi16 $16, $3, 32 +; MMR6-NEXT: li16 $2, 64 +; MMR6-NEXT: subu16 $2, $2, $3 +; MMR6-NEXT: andi16 $17, $2, 63 +; MMR6-NEXT: sllv $1, $6, $17 +; MMR6-NEXT: not16 $4, $17 +; MMR6-NEXT: srl16 $16, $7, 1 +; MMR6-NEXT: sw $16, 4($sp) # 4-byte Folded Spill +; MMR6-NEXT: srlv $9, $16, $4 +; MMR6-NEXT: sllv $10, $7, $17 +; MMR6-NEXT: andi16 $16, $2, 32 +; MMR6-NEXT: selnez $11, $10, $16 +; MMR6-NEXT: or $1, $1, $9 ; MMR6-NEXT: seleqz $1, $1, $16 -; MMR6-NEXT: selnez $9, $8, $16 -; MMR6-NEXT: li16 $17, 64 -; MMR6-NEXT: subu16 $17, $17, $3 -; MMR6-NEXT: srlv $10, $6, $17 -; MMR6-NEXT: andi16 $2, $17, 32 -; MMR6-NEXT: seleqz $12, $10, $2 -; MMR6-NEXT: or $1, $9, $1 -; MMR6-NEXT: selnez $9, $10, $2 -; MMR6-NEXT: srlv $10, $7, $17 -; MMR6-NEXT: not16 $17, $17 -; MMR6-NEXT: sll16 $4, $6, 1 -; MMR6-NEXT: sllv $4, $4, $17 -; MMR6-NEXT: or $4, $4, $10 -; MMR6-NEXT: seleqz $2, $4, $2 +; MMR6-NEXT: sllv $9, $8, $17 +; MMR6-NEXT: srl16 $2, $5, 1 +; MMR6-NEXT: srlv $2, $2, $4 +; MMR6-NEXT: or $1, $11, $1 +; MMR6-NEXT: or $11, $9, $2 +; MMR6-NEXT: srlv $9, $7, $17 +; MMR6-NEXT: sll16 $2, $6, 1 +; MMR6-NEXT: sllv $2, $2, $4 +; MMR6-NEXT: seleqz $11, $11, $16 +; MMR6-NEXT: or $9, $2, $9 +; MMR6-NEXT: srlv $12, $6, $17 ; MMR6-NEXT: addiu $4, $3, -64 -; MMR6-NEXT: or $10, $9, $2 -; MMR6-NEXT: or $1, $1, $12 -; MMR6-NEXT: sllv $9, $6, $4 -; MMR6-NEXT: srl16 $2, $7, 1 -; MMR6-NEXT: not16 $17, $4 -; MMR6-NEXT: srlv $12, $2, $17 -; MMR6-NEXT: or $9, $9, $12 -; MMR6-NEXT: andi16 $17, $4, 32 -; MMR6-NEXT: seleqz $9, $9, $17 -; MMR6-NEXT: sllv $14, $7, $4 -; MMR6-NEXT: selnez $12, $14, $17 +; MMR6-NEXT: andi16 $2, $4, 63 ; MMR6-NEXT: sltiu $13, $3, 64 ; MMR6-NEXT: selnez $1, $1, $13 -; MMR6-NEXT: or $9, $12, $9 -; MMR6-NEXT: sllv $6, $6, $3 -; MMR6-NEXT: lw $4, 4($sp) # 4-byte Folded Reload -; MMR6-NEXT: srlv $2, $2, $4 -; MMR6-NEXT: seleqz $8, $8, $16 -; MMR6-NEXT: li16 $4, 0 -; MMR6-NEXT: or $8, $8, $10 -; MMR6-NEXT: or $6, $6, $2 -; MMR6-NEXT: seleqz $2, $9, $13 -; MMR6-NEXT: seleqz $9, $4, $13 -; MMR6-NEXT: sllv $7, $7, $3 -; MMR6-NEXT: seleqz $10, $7, $16 +; MMR6-NEXT: selnez $14, $12, $16 +; MMR6-NEXT: seleqz $9, $9, $16 +; MMR6-NEXT: sllv $15, $5, $17 +; MMR6-NEXT: selnez $24, $15, $16 +; MMR6-NEXT: seleqz $10, $10, $16 +; MMR6-NEXT: not16 $17, $2 +; MMR6-NEXT: or $11, $24, $11 +; MMR6-NEXT: seleqz $12, $12, $16 +; MMR6-NEXT: or $9, $14, $9 +; MMR6-NEXT: seleqz $14, $15, $16 +; MMR6-NEXT: sllv $6, $6, $2 +; MMR6-NEXT: lw $16, 4($sp) # 4-byte Folded Reload +; MMR6-NEXT: srlv $15, $16, $17 +; MMR6-NEXT: li16 $16, 0 +; MMR6-NEXT: seleqz $24, $16, $13 +; MMR6-NEXT: or $9, $14, $9 +; MMR6-NEXT: or $1, $24, $1 ; MMR6-NEXT: selnez $10, $10, $13 -; MMR6-NEXT: seleqz $11, $11, $3 -; MMR6-NEXT: or $1, $1, $2 -; MMR6-NEXT: selnez $1, $1, $3 -; MMR6-NEXT: or $2, $11, $1 -; MMR6-NEXT: or $1, $9, $10 -; MMR6-NEXT: seleqz $6, $6, $16 -; MMR6-NEXT: selnez $7, $7, $16 ; MMR6-NEXT: seleqz $5, $5, $3 -; MMR6-NEXT: selnez $8, $8, $13 -; MMR6-NEXT: seleqz $4, $14, $17 +; MMR6-NEXT: seleqz $8, $8, $3 +; MMR6-NEXT: or $11, $11, $12 +; MMR6-NEXT: selnez $11, $11, $13 +; MMR6-NEXT: or $6, $6, $15 +; MMR6-NEXT: andi16 $4, $4, 32 +; MMR6-NEXT: seleqz $6, $6, $4 +; MMR6-NEXT: sllv $7, $7, $2 +; MMR6-NEXT: selnez $2, $7, $4 +; MMR6-NEXT: or $2, $2, $6 +; MMR6-NEXT: seleqz $2, $2, $13 +; MMR6-NEXT: or $2, $11, $2 +; MMR6-NEXT: selnez $2, $2, $3 +; MMR6-NEXT: or $2, $8, $2 +; MMR6-NEXT: selnez $6, $9, $13 +; MMR6-NEXT: seleqz $4, $7, $4 ; MMR6-NEXT: seleqz $4, $4, $13 -; MMR6-NEXT: or $4, $8, $4 +; MMR6-NEXT: or $4, $6, $4 ; MMR6-NEXT: selnez $3, $4, $3 ; MMR6-NEXT: or $3, $5, $3 -; MMR6-NEXT: or $4, $7, $6 -; MMR6-NEXT: selnez $4, $4, $13 -; MMR6-NEXT: or $4, $9, $4 -; MMR6-NEXT: move $5, $1 +; MMR6-NEXT: or $5, $24, $10 +; MMR6-NEXT: move $4, $1 ; MMR6-NEXT: lw $16, 8($sp) # 4-byte Folded Reload ; MMR6-NEXT: lw $17, 12($sp) # 4-byte Folded Reload ; MMR6-NEXT: addiu $sp, $sp, 16 diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll --- a/llvm/test/CodeGen/RISCV/shifts.ll +++ b/llvm/test/CodeGen/RISCV/shifts.ll @@ -145,115 +145,117 @@ define i128 @lshr128(i128 %a, i128 %b) nounwind { ; RV32I-LABEL: lshr128: ; RV32I: # %bb.0: -; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t1, a7, a2 -; RV32I-NEXT: sll t0, a5, a6 -; RV32I-NEXT: bltz t1, .LBB6_2 +; RV32I-NEXT: addi sp, sp, -16 +; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: li a5, 64 +; RV32I-NEXT: sub a6, a5, a4 +; RV32I-NEXT: andi t0, a6, 63 +; RV32I-NEXT: addi a7, t0, -32 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: bltz a7, .LBB6_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, t0 +; RV32I-NEXT: srl t5, a3, a7 ; RV32I-NEXT: j .LBB6_3 ; RV32I-NEXT: .LBB6_2: -; RV32I-NEXT: sll a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srli t2, a5, 1 -; RV32I-NEXT: srl a7, t2, a7 -; RV32I-NEXT: or t2, a6, a7 +; RV32I-NEXT: srl t1, a2, a6 +; RV32I-NEXT: slli t2, a3, 1 +; RV32I-NEXT: sll t2, t2, t0 +; RV32I-NEXT: or t5, t1, t2 ; RV32I-NEXT: .LBB6_3: -; RV32I-NEXT: lw t5, 4(a1) -; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: bgez a6, .LBB6_5 +; RV32I-NEXT: lw t1, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: bgez a7, .LBB6_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a7, t5, a2 -; RV32I-NEXT: or t2, t2, a7 +; RV32I-NEXT: sll t2, t1, a6 +; RV32I-NEXT: or t5, t5, t2 ; RV32I-NEXT: .LBB6_5: -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: addi t3, a2, -64 +; RV32I-NEXT: addi t3, a4, -64 +; RV32I-NEXT: andi t6, t3, 63 +; RV32I-NEXT: addi t4, t6, -32 +; RV32I-NEXT: slli t2, a1, 1 ; RV32I-NEXT: bltz t4, .LBB6_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB6_8 +; RV32I-NEXT: srl t6, a1, t4 +; RV32I-NEXT: bgeu a4, a5, .LBB6_8 ; RV32I-NEXT: j .LBB6_9 ; RV32I-NEXT: .LBB6_7: -; RV32I-NEXT: srl a7, a4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB6_9 +; RV32I-NEXT: srl s0, t1, t3 +; RV32I-NEXT: xori t6, t6, 31 +; RV32I-NEXT: sll t6, t2, t6 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: bltu a4, a5, .LBB6_9 ; RV32I-NEXT: .LBB6_8: -; RV32I-NEXT: mv t2, a7 +; RV32I-NEXT: mv t5, t6 ; RV32I-NEXT: .LBB6_9: -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: beqz a2, .LBB6_11 +; RV32I-NEXT: bnez a4, .LBB6_12 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, t2 +; RV32I-NEXT: bltz a7, .LBB6_13 ; RV32I-NEXT: .LBB6_11: -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t2, a2, 31 -; RV32I-NEXT: bltz a6, .LBB6_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl t5, t5, a6 -; RV32I-NEXT: bltz t1, .LBB6_14 +; RV32I-NEXT: sll t5, t1, a7 +; RV32I-NEXT: bltz a7, .LBB6_14 ; RV32I-NEXT: j .LBB6_15 +; RV32I-NEXT: .LBB6_12: +; RV32I-NEXT: mv a2, t5 +; RV32I-NEXT: bgez a7, .LBB6_11 ; RV32I-NEXT: .LBB6_13: -; RV32I-NEXT: srl t6, a1, a2 -; RV32I-NEXT: slli t5, t5, 1 -; RV32I-NEXT: sll t5, t5, t2 -; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: bgez t1, .LBB6_15 +; RV32I-NEXT: sll t5, a1, a6 +; RV32I-NEXT: srli t6, t1, 1 +; RV32I-NEXT: srl t6, t6, t0 +; RV32I-NEXT: or t5, t5, t6 +; RV32I-NEXT: bgez a7, .LBB6_15 ; RV32I-NEXT: .LBB6_14: -; RV32I-NEXT: or t5, t5, t0 +; RV32I-NEXT: srl t6, a3, a6 +; RV32I-NEXT: or t5, t5, t6 ; RV32I-NEXT: .LBB6_15: -; RV32I-NEXT: slli t0, a4, 1 ; RV32I-NEXT: bltz t4, .LBB6_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: srl t1, a4, t4 -; RV32I-NEXT: bgeu a2, a3, .LBB6_18 +; RV32I-NEXT: li t3, 0 +; RV32I-NEXT: bgeu a4, a5, .LBB6_18 ; RV32I-NEXT: j .LBB6_19 ; RV32I-NEXT: .LBB6_17: -; RV32I-NEXT: srl t1, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: sll t3, t0, t3 -; RV32I-NEXT: or t1, t1, t3 -; RV32I-NEXT: bltu a2, a3, .LBB6_19 +; RV32I-NEXT: srl t3, a1, t3 +; RV32I-NEXT: bltu a4, a5, .LBB6_19 ; RV32I-NEXT: .LBB6_18: -; RV32I-NEXT: mv t5, t1 +; RV32I-NEXT: mv t5, t3 ; RV32I-NEXT: .LBB6_19: -; RV32I-NEXT: bnez a2, .LBB6_22 +; RV32I-NEXT: bnez a4, .LBB6_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a6, .LBB6_23 +; RV32I-NEXT: bltz a7, .LBB6_23 ; RV32I-NEXT: .LBB6_21: -; RV32I-NEXT: srl a5, a4, a6 -; RV32I-NEXT: bgeu a2, a3, .LBB6_24 +; RV32I-NEXT: srl t0, a1, a7 +; RV32I-NEXT: bgeu a4, a5, .LBB6_24 ; RV32I-NEXT: j .LBB6_25 ; RV32I-NEXT: .LBB6_22: -; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bgez a6, .LBB6_21 +; RV32I-NEXT: mv a3, t5 +; RV32I-NEXT: bgez a7, .LBB6_21 ; RV32I-NEXT: .LBB6_23: -; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: sll t0, t0, t2 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: bltu a2, a3, .LBB6_25 +; RV32I-NEXT: srl t1, t1, a6 +; RV32I-NEXT: sll t0, t2, t0 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: bltu a4, a5, .LBB6_25 ; RV32I-NEXT: .LBB6_24: -; RV32I-NEXT: li a5, 0 +; RV32I-NEXT: li t0, 0 ; RV32I-NEXT: .LBB6_25: -; RV32I-NEXT: bltz a6, .LBB6_27 +; RV32I-NEXT: bltz a7, .LBB6_27 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB6_28 +; RV32I-NEXT: li a1, 0 +; RV32I-NEXT: bgeu a4, a5, .LBB6_28 ; RV32I-NEXT: j .LBB6_29 ; RV32I-NEXT: .LBB6_27: -; RV32I-NEXT: srl a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB6_29 +; RV32I-NEXT: srl a1, a1, a6 +; RV32I-NEXT: bltu a4, a5, .LBB6_29 ; RV32I-NEXT: .LBB6_28: -; RV32I-NEXT: li a4, 0 +; RV32I-NEXT: li a1, 0 ; RV32I-NEXT: .LBB6_29: -; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a5, 8(a0) -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw a7, 4(a0) +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw t0, 8(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 0(a0) +; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload +; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret ; ; RV64I-LABEL: lshr128: @@ -281,116 +283,114 @@ ; RV32I: # %bb.0: ; RV32I-NEXT: addi sp, sp, -16 ; RV32I-NEXT: sw s0, 12(sp) # 4-byte Folded Spill -; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a5, 8(a1) -; RV32I-NEXT: lw a4, 12(a1) -; RV32I-NEXT: neg a6, a2 -; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t2, a7, a2 -; RV32I-NEXT: sll t1, a5, a6 -; RV32I-NEXT: bltz t2, .LBB7_2 +; RV32I-NEXT: lw a4, 0(a2) +; RV32I-NEXT: lw a3, 4(a1) +; RV32I-NEXT: lw a2, 0(a1) +; RV32I-NEXT: li a5, 64 +; RV32I-NEXT: sub a6, a5, a4 +; RV32I-NEXT: andi t0, a6, 63 +; RV32I-NEXT: addi a7, t0, -32 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: bltz a7, .LBB7_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t4, t1 +; RV32I-NEXT: srl t4, a3, a7 ; RV32I-NEXT: j .LBB7_3 ; RV32I-NEXT: .LBB7_2: -; RV32I-NEXT: sll a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: srli t0, a5, 1 -; RV32I-NEXT: srl a7, t0, a7 -; RV32I-NEXT: or t4, a6, a7 +; RV32I-NEXT: srl t1, a2, a6 +; RV32I-NEXT: slli t2, a3, 1 +; RV32I-NEXT: sll t2, t2, t0 +; RV32I-NEXT: or t4, t1, t2 ; RV32I-NEXT: .LBB7_3: -; RV32I-NEXT: lw t6, 4(a1) -; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: bgez a6, .LBB7_5 +; RV32I-NEXT: lw t1, 8(a1) +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: bgez a7, .LBB7_5 ; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: srl a7, t6, a2 -; RV32I-NEXT: or t4, t4, a7 +; RV32I-NEXT: sll t2, t1, a6 +; RV32I-NEXT: or t4, t4, t2 ; RV32I-NEXT: .LBB7_5: -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: addi t5, a2, -96 -; RV32I-NEXT: srai a7, a4, 31 +; RV32I-NEXT: addi t3, a4, -64 +; RV32I-NEXT: andi t6, t3, 63 +; RV32I-NEXT: addi t5, t6, -32 +; RV32I-NEXT: slli t2, a1, 1 ; RV32I-NEXT: bltz t5, .LBB7_7 ; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: mv t0, a7 -; RV32I-NEXT: bgeu a2, a3, .LBB7_8 +; RV32I-NEXT: sra t6, a1, t5 +; RV32I-NEXT: bgeu a4, a5, .LBB7_8 ; RV32I-NEXT: j .LBB7_9 ; RV32I-NEXT: .LBB7_7: -; RV32I-NEXT: sra t0, a4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB7_9 +; RV32I-NEXT: srl s0, t1, t3 +; RV32I-NEXT: xori t6, t6, 31 +; RV32I-NEXT: sll t6, t2, t6 +; RV32I-NEXT: or t6, s0, t6 +; RV32I-NEXT: bltu a4, a5, .LBB7_9 ; RV32I-NEXT: .LBB7_8: -; RV32I-NEXT: mv t4, t0 +; RV32I-NEXT: mv t4, t6 ; RV32I-NEXT: .LBB7_9: -; RV32I-NEXT: mv t0, t6 -; RV32I-NEXT: beqz a2, .LBB7_11 +; RV32I-NEXT: bnez a4, .LBB7_12 ; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv t0, t4 +; RV32I-NEXT: bltz a7, .LBB7_13 ; RV32I-NEXT: .LBB7_11: -; RV32I-NEXT: lw a1, 0(a1) -; RV32I-NEXT: xori t4, a2, 31 -; RV32I-NEXT: bltz a6, .LBB7_13 -; RV32I-NEXT: # %bb.12: -; RV32I-NEXT: srl t6, t6, a6 -; RV32I-NEXT: bltz t2, .LBB7_14 +; RV32I-NEXT: sll t6, t1, a7 +; RV32I-NEXT: bltz a7, .LBB7_14 ; RV32I-NEXT: j .LBB7_15 +; RV32I-NEXT: .LBB7_12: +; RV32I-NEXT: mv a2, t4 +; RV32I-NEXT: bgez a7, .LBB7_11 ; RV32I-NEXT: .LBB7_13: -; RV32I-NEXT: srl s0, a1, a2 -; RV32I-NEXT: slli t6, t6, 1 -; RV32I-NEXT: sll t6, t6, t4 -; RV32I-NEXT: or t6, s0, t6 -; RV32I-NEXT: bgez t2, .LBB7_15 +; RV32I-NEXT: sll t4, a1, a6 +; RV32I-NEXT: srli t6, t1, 1 +; RV32I-NEXT: srl t6, t6, t0 +; RV32I-NEXT: or t6, t4, t6 +; RV32I-NEXT: bgez a7, .LBB7_15 ; RV32I-NEXT: .LBB7_14: -; RV32I-NEXT: or t6, t6, t1 +; RV32I-NEXT: srl t4, a3, a6 +; RV32I-NEXT: or t6, t6, t4 ; RV32I-NEXT: .LBB7_15: -; RV32I-NEXT: slli t1, a4, 1 +; RV32I-NEXT: srai t4, a1, 31 ; RV32I-NEXT: bltz t5, .LBB7_17 ; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sra t2, a4, t5 -; RV32I-NEXT: bgeu a2, a3, .LBB7_18 +; RV32I-NEXT: mv t3, t4 +; RV32I-NEXT: bgeu a4, a5, .LBB7_18 ; RV32I-NEXT: j .LBB7_19 ; RV32I-NEXT: .LBB7_17: -; RV32I-NEXT: srl t2, a5, t3 -; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: sll t3, t1, t3 -; RV32I-NEXT: or t2, t2, t3 -; RV32I-NEXT: bltu a2, a3, .LBB7_19 +; RV32I-NEXT: sra t3, a1, t3 +; RV32I-NEXT: bltu a4, a5, .LBB7_19 ; RV32I-NEXT: .LBB7_18: -; RV32I-NEXT: mv t6, t2 +; RV32I-NEXT: mv t6, t3 ; RV32I-NEXT: .LBB7_19: -; RV32I-NEXT: bnez a2, .LBB7_22 +; RV32I-NEXT: bnez a4, .LBB7_22 ; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a6, .LBB7_23 +; RV32I-NEXT: bltz a7, .LBB7_23 ; RV32I-NEXT: .LBB7_21: -; RV32I-NEXT: sra a5, a4, a6 -; RV32I-NEXT: bgeu a2, a3, .LBB7_24 +; RV32I-NEXT: sra t0, a1, a7 +; RV32I-NEXT: bgeu a4, a5, .LBB7_24 ; RV32I-NEXT: j .LBB7_25 ; RV32I-NEXT: .LBB7_22: -; RV32I-NEXT: mv a1, t6 -; RV32I-NEXT: bgez a6, .LBB7_21 +; RV32I-NEXT: mv a3, t6 +; RV32I-NEXT: bgez a7, .LBB7_21 ; RV32I-NEXT: .LBB7_23: -; RV32I-NEXT: srl a5, a5, a2 -; RV32I-NEXT: sll t1, t1, t4 -; RV32I-NEXT: or a5, a5, t1 -; RV32I-NEXT: bltu a2, a3, .LBB7_25 +; RV32I-NEXT: srl t1, t1, a6 +; RV32I-NEXT: sll t0, t2, t0 +; RV32I-NEXT: or t0, t1, t0 +; RV32I-NEXT: bltu a4, a5, .LBB7_25 ; RV32I-NEXT: .LBB7_24: -; RV32I-NEXT: mv a5, a7 +; RV32I-NEXT: mv t0, t4 ; RV32I-NEXT: .LBB7_25: -; RV32I-NEXT: bltz a6, .LBB7_27 +; RV32I-NEXT: bltz a7, .LBB7_27 ; RV32I-NEXT: # %bb.26: -; RV32I-NEXT: mv a4, a7 -; RV32I-NEXT: bgeu a2, a3, .LBB7_28 +; RV32I-NEXT: mv a1, t4 +; RV32I-NEXT: bgeu a4, a5, .LBB7_28 ; RV32I-NEXT: j .LBB7_29 ; RV32I-NEXT: .LBB7_27: -; RV32I-NEXT: sra a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB7_29 +; RV32I-NEXT: sra a1, a1, a6 +; RV32I-NEXT: bltu a4, a5, .LBB7_29 ; RV32I-NEXT: .LBB7_28: -; RV32I-NEXT: mv a4, a7 +; RV32I-NEXT: mv a1, t4 ; RV32I-NEXT: .LBB7_29: -; RV32I-NEXT: sw a4, 12(a0) -; RV32I-NEXT: sw a5, 8(a0) -; RV32I-NEXT: sw a1, 0(a0) -; RV32I-NEXT: sw t0, 4(a0) +; RV32I-NEXT: sw a1, 12(a0) +; RV32I-NEXT: sw t0, 8(a0) +; RV32I-NEXT: sw a3, 4(a0) +; RV32I-NEXT: sw a2, 0(a0) ; RV32I-NEXT: lw s0, 12(sp) # 4-byte Folded Reload ; RV32I-NEXT: addi sp, sp, 16 ; RV32I-NEXT: ret @@ -419,114 +419,115 @@ ; RV32I-LABEL: shl128: ; RV32I: # %bb.0: ; RV32I-NEXT: lw a2, 0(a2) -; RV32I-NEXT: lw a5, 4(a1) +; RV32I-NEXT: lw a7, 4(a1) ; RV32I-NEXT: lw a4, 0(a1) -; RV32I-NEXT: neg a6, a2 ; RV32I-NEXT: li a3, 64 -; RV32I-NEXT: li a7, 32 -; RV32I-NEXT: sub t1, a7, a2 -; RV32I-NEXT: srl t0, a5, a6 -; RV32I-NEXT: bltz t1, .LBB8_2 +; RV32I-NEXT: sub a5, a3, a2 +; RV32I-NEXT: andi t0, a5, 63 +; RV32I-NEXT: addi a6, t0, -32 +; RV32I-NEXT: xori t0, t0, 31 +; RV32I-NEXT: bltz a6, .LBB8_2 ; RV32I-NEXT: # %bb.1: -; RV32I-NEXT: mv t2, t0 -; RV32I-NEXT: j .LBB8_3 +; RV32I-NEXT: srl t6, a7, a6 +; RV32I-NEXT: lw t5, 8(a1) +; RV32I-NEXT: bltz a6, .LBB8_3 +; RV32I-NEXT: j .LBB8_4 ; RV32I-NEXT: .LBB8_2: -; RV32I-NEXT: srl a6, a4, a6 -; RV32I-NEXT: sub a7, a3, a2 -; RV32I-NEXT: xori a7, a7, 31 -; RV32I-NEXT: slli t2, a5, 1 -; RV32I-NEXT: sll a7, t2, a7 -; RV32I-NEXT: or t2, a6, a7 -; RV32I-NEXT: .LBB8_3: +; RV32I-NEXT: srl t1, a4, a5 +; RV32I-NEXT: slli t2, a7, 1 +; RV32I-NEXT: sll t2, t2, t0 +; RV32I-NEXT: or t6, t1, t2 ; RV32I-NEXT: lw t5, 8(a1) -; RV32I-NEXT: addi a6, a2, -32 -; RV32I-NEXT: bgez a6, .LBB8_5 -; RV32I-NEXT: # %bb.4: -; RV32I-NEXT: sll a7, t5, a2 -; RV32I-NEXT: or t2, t2, a7 -; RV32I-NEXT: .LBB8_5: -; RV32I-NEXT: addi t4, a2, -96 -; RV32I-NEXT: addi t3, a2, -64 -; RV32I-NEXT: bltz t4, .LBB8_7 -; RV32I-NEXT: # %bb.6: -; RV32I-NEXT: li a7, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB8_8 -; RV32I-NEXT: j .LBB8_9 +; RV32I-NEXT: bgez a6, .LBB8_4 +; RV32I-NEXT: .LBB8_3: +; RV32I-NEXT: sll t1, t5, a5 +; RV32I-NEXT: or t6, t6, t1 +; RV32I-NEXT: .LBB8_4: +; RV32I-NEXT: addi t2, a2, -64 +; RV32I-NEXT: andi t3, t2, 63 +; RV32I-NEXT: addi t4, t3, -32 +; RV32I-NEXT: bltz t4, .LBB8_6 +; RV32I-NEXT: # %bb.5: +; RV32I-NEXT: li t1, 0 +; RV32I-NEXT: bgeu a2, a3, .LBB8_7 +; RV32I-NEXT: j .LBB8_8 +; RV32I-NEXT: .LBB8_6: +; RV32I-NEXT: sll t1, a4, t2 +; RV32I-NEXT: bltu a2, a3, .LBB8_8 ; RV32I-NEXT: .LBB8_7: -; RV32I-NEXT: sll a7, a4, t3 -; RV32I-NEXT: bltu a2, a3, .LBB8_9 +; RV32I-NEXT: mv t6, t1 ; RV32I-NEXT: .LBB8_8: -; RV32I-NEXT: mv t2, a7 -; RV32I-NEXT: .LBB8_9: -; RV32I-NEXT: mv a7, t5 -; RV32I-NEXT: beqz a2, .LBB8_11 -; RV32I-NEXT: # %bb.10: -; RV32I-NEXT: mv a7, t2 -; RV32I-NEXT: .LBB8_11: +; RV32I-NEXT: mv t1, t5 +; RV32I-NEXT: bnez a2, .LBB8_11 +; RV32I-NEXT: # %bb.9: ; RV32I-NEXT: lw a1, 12(a1) -; RV32I-NEXT: xori t2, a2, 31 -; RV32I-NEXT: bltz a6, .LBB8_13 -; RV32I-NEXT: # %bb.12: +; RV32I-NEXT: bltz a6, .LBB8_12 +; RV32I-NEXT: .LBB8_10: ; RV32I-NEXT: sll t5, t5, a6 -; RV32I-NEXT: bltz t1, .LBB8_14 -; RV32I-NEXT: j .LBB8_15 -; RV32I-NEXT: .LBB8_13: -; RV32I-NEXT: sll t6, a1, a2 +; RV32I-NEXT: bltz a6, .LBB8_13 +; RV32I-NEXT: j .LBB8_14 +; RV32I-NEXT: .LBB8_11: +; RV32I-NEXT: mv t1, t6 +; RV32I-NEXT: lw a1, 12(a1) +; RV32I-NEXT: bgez a6, .LBB8_10 +; RV32I-NEXT: .LBB8_12: +; RV32I-NEXT: sll t6, a1, a5 ; RV32I-NEXT: srli t5, t5, 1 -; RV32I-NEXT: srl t5, t5, t2 +; RV32I-NEXT: srl t5, t5, t0 ; RV32I-NEXT: or t5, t6, t5 -; RV32I-NEXT: bgez t1, .LBB8_15 +; RV32I-NEXT: bgez a6, .LBB8_14 +; RV32I-NEXT: .LBB8_13: +; RV32I-NEXT: srl t6, a7, a5 +; RV32I-NEXT: or t5, t5, t6 ; RV32I-NEXT: .LBB8_14: -; RV32I-NEXT: or t5, t5, t0 -; RV32I-NEXT: .LBB8_15: -; RV32I-NEXT: srli t0, a4, 1 -; RV32I-NEXT: bltz t4, .LBB8_17 -; RV32I-NEXT: # %bb.16: -; RV32I-NEXT: sll t1, a4, t4 -; RV32I-NEXT: bgeu a2, a3, .LBB8_18 -; RV32I-NEXT: j .LBB8_19 -; RV32I-NEXT: .LBB8_17: -; RV32I-NEXT: sll t1, a5, t3 +; RV32I-NEXT: srli t6, a4, 1 +; RV32I-NEXT: bltz t4, .LBB8_16 +; RV32I-NEXT: # %bb.15: +; RV32I-NEXT: sll t2, a4, t4 +; RV32I-NEXT: bgeu a2, a3, .LBB8_17 +; RV32I-NEXT: j .LBB8_18 +; RV32I-NEXT: .LBB8_16: +; RV32I-NEXT: sll t2, a7, t2 ; RV32I-NEXT: xori t3, t3, 31 -; RV32I-NEXT: srl t3, t0, t3 -; RV32I-NEXT: or t1, t1, t3 -; RV32I-NEXT: bltu a2, a3, .LBB8_19 +; RV32I-NEXT: srl t3, t6, t3 +; RV32I-NEXT: or t2, t2, t3 +; RV32I-NEXT: bltu a2, a3, .LBB8_18 +; RV32I-NEXT: .LBB8_17: +; RV32I-NEXT: mv t5, t2 ; RV32I-NEXT: .LBB8_18: -; RV32I-NEXT: mv t5, t1 -; RV32I-NEXT: .LBB8_19: -; RV32I-NEXT: bnez a2, .LBB8_22 -; RV32I-NEXT: # %bb.20: -; RV32I-NEXT: bltz a6, .LBB8_23 +; RV32I-NEXT: bnez a2, .LBB8_21 +; RV32I-NEXT: # %bb.19: +; RV32I-NEXT: bltz a6, .LBB8_22 +; RV32I-NEXT: .LBB8_20: +; RV32I-NEXT: sll a7, a4, a6 +; RV32I-NEXT: bgeu a2, a3, .LBB8_23 +; RV32I-NEXT: j .LBB8_24 ; RV32I-NEXT: .LBB8_21: -; RV32I-NEXT: sll a5, a4, a6 -; RV32I-NEXT: bgeu a2, a3, .LBB8_24 -; RV32I-NEXT: j .LBB8_25 -; RV32I-NEXT: .LBB8_22: ; RV32I-NEXT: mv a1, t5 -; RV32I-NEXT: bgez a6, .LBB8_21 +; RV32I-NEXT: bgez a6, .LBB8_20 +; RV32I-NEXT: .LBB8_22: +; RV32I-NEXT: sll a7, a7, a5 +; RV32I-NEXT: srl t0, t6, t0 +; RV32I-NEXT: or a7, a7, t0 +; RV32I-NEXT: bltu a2, a3, .LBB8_24 ; RV32I-NEXT: .LBB8_23: -; RV32I-NEXT: sll a5, a5, a2 -; RV32I-NEXT: srl t0, t0, t2 -; RV32I-NEXT: or a5, a5, t0 -; RV32I-NEXT: bltu a2, a3, .LBB8_25 +; RV32I-NEXT: li a7, 0 ; RV32I-NEXT: .LBB8_24: -; RV32I-NEXT: li a5, 0 -; RV32I-NEXT: .LBB8_25: -; RV32I-NEXT: bltz a6, .LBB8_27 -; RV32I-NEXT: # %bb.26: +; RV32I-NEXT: bltz a6, .LBB8_26 +; RV32I-NEXT: # %bb.25: ; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: bgeu a2, a3, .LBB8_28 -; RV32I-NEXT: j .LBB8_29 +; RV32I-NEXT: bgeu a2, a3, .LBB8_27 +; RV32I-NEXT: j .LBB8_28 +; RV32I-NEXT: .LBB8_26: +; RV32I-NEXT: sll a4, a4, a5 +; RV32I-NEXT: bltu a2, a3, .LBB8_28 ; RV32I-NEXT: .LBB8_27: -; RV32I-NEXT: sll a4, a4, a2 -; RV32I-NEXT: bltu a2, a3, .LBB8_29 -; RV32I-NEXT: .LBB8_28: ; RV32I-NEXT: li a4, 0 -; RV32I-NEXT: .LBB8_29: +; RV32I-NEXT: .LBB8_28: ; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a5, 4(a0) +; RV32I-NEXT: sw a7, 4(a0) ; RV32I-NEXT: sw a1, 12(a0) -; RV32I-NEXT: sw a7, 8(a0) +; RV32I-NEXT: sw t1, 8(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: shl128: diff --git a/llvm/test/CodeGen/SystemZ/shift-12.ll b/llvm/test/CodeGen/SystemZ/shift-12.ll --- a/llvm/test/CodeGen/SystemZ/shift-12.ll +++ b/llvm/test/CodeGen/SystemZ/shift-12.ll @@ -125,21 +125,21 @@ ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r0, 8(%r3) -; CHECK-NEXT: lg %r1, 0(%r3) -; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 -; CHECK-NEXT: lcr %r14, %r3 -; CHECK-NEXT: sllg %r5, %r1, 0(%r4) -; CHECK-NEXT: srlg %r14, %r0, 0(%r14) +; CHECK-NEXT: lg %r0, 0(%r3) +; CHECK-NEXT: lg %r1, 8(%r3) +; CHECK-NEXT: lhi %r3, 64 +; CHECK-NEXT: sr %r3, %r4 +; CHECK-NEXT: srlg %r5, %r1, 0(%r3) +; CHECK-NEXT: sllg %r14, %r0, 0(%r3) ; CHECK-NEXT: ogr %r5, %r14 -; CHECK-NEXT: sllg %r3, %r0, -64(%r3) +; CHECK-NEXT: sllg %r14, %r1, -64(%r4) ; CHECK-NEXT: tmll %r4, 127 -; CHECK-NEXT: locgrle %r3, %r5 -; CHECK-NEXT: sllg %r0, %r0, 0(%r4) -; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locgrle %r14, %r5 +; CHECK-NEXT: locgre %r14, %r0 +; CHECK-NEXT: sllg %r0, %r1, 0(%r3) ; CHECK-NEXT: locghinle %r0, 0 ; CHECK-NEXT: stg %r0, 8(%r2) -; CHECK-NEXT: stg %r3, 0(%r2) +; CHECK-NEXT: stg %r14, 0(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 @@ -154,21 +154,21 @@ ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r0, 0(%r3) -; CHECK-NEXT: lg %r1, 8(%r3) -; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 -; CHECK-NEXT: lcr %r14, %r3 -; CHECK-NEXT: srlg %r5, %r1, 0(%r4) -; CHECK-NEXT: sllg %r14, %r0, 0(%r14) +; CHECK-NEXT: lg %r0, 8(%r3) +; CHECK-NEXT: lg %r1, 0(%r3) +; CHECK-NEXT: lhi %r3, 64 +; CHECK-NEXT: sr %r3, %r4 +; CHECK-NEXT: sllg %r5, %r1, 0(%r3) +; CHECK-NEXT: srlg %r14, %r0, 0(%r3) ; CHECK-NEXT: ogr %r5, %r14 -; CHECK-NEXT: srlg %r3, %r0, -64(%r3) +; CHECK-NEXT: srlg %r14, %r1, -64(%r4) ; CHECK-NEXT: tmll %r4, 127 -; CHECK-NEXT: locgrle %r3, %r5 -; CHECK-NEXT: srlg %r0, %r0, 0(%r4) -; CHECK-NEXT: locgre %r3, %r1 +; CHECK-NEXT: locgrle %r14, %r5 +; CHECK-NEXT: locgre %r14, %r0 +; CHECK-NEXT: srlg %r0, %r1, 0(%r3) ; CHECK-NEXT: locghinle %r0, 0 ; CHECK-NEXT: stg %r0, 0(%r2) -; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: stg %r14, 8(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 @@ -183,22 +183,22 @@ ; CHECK-NEXT: stmg %r14, %r15, 112(%r15) ; CHECK-NEXT: .cfi_offset %r14, -48 ; CHECK-NEXT: .cfi_offset %r15, -40 -; CHECK-NEXT: lg %r0, 0(%r3) -; CHECK-NEXT: lg %r1, 8(%r3) -; CHECK-NEXT: risblg %r3, %r4, 25, 159, 0 -; CHECK-NEXT: lcr %r14, %r3 -; CHECK-NEXT: srlg %r5, %r1, 0(%r4) -; CHECK-NEXT: sllg %r14, %r0, 0(%r14) +; CHECK-NEXT: lg %r0, 8(%r3) +; CHECK-NEXT: lg %r1, 0(%r3) +; CHECK-NEXT: lhi %r3, 64 +; CHECK-NEXT: sr %r3, %r4 +; CHECK-NEXT: sllg %r5, %r1, 0(%r3) +; CHECK-NEXT: srlg %r14, %r0, 0(%r3) ; CHECK-NEXT: ogr %r5, %r14 -; CHECK-NEXT: srag %r14, %r0, 0(%r4) -; CHECK-NEXT: srag %r3, %r0, -64(%r3) -; CHECK-NEXT: srag %r0, %r0, 63 +; CHECK-NEXT: srag %r3, %r1, 0(%r3) +; CHECK-NEXT: srag %r14, %r1, -64(%r4) +; CHECK-NEXT: srag %r1, %r1, 63 ; CHECK-NEXT: tmll %r4, 127 -; CHECK-NEXT: locgrle %r3, %r5 -; CHECK-NEXT: locgre %r3, %r1 -; CHECK-NEXT: locgrle %r0, %r14 -; CHECK-NEXT: stg %r0, 0(%r2) -; CHECK-NEXT: stg %r3, 8(%r2) +; CHECK-NEXT: locgrle %r14, %r5 +; CHECK-NEXT: locgre %r14, %r0 +; CHECK-NEXT: locgrle %r1, %r3 +; CHECK-NEXT: stg %r1, 0(%r2) +; CHECK-NEXT: stg %r14, 8(%r2) ; CHECK-NEXT: lmg %r14, %r15, 112(%r15) ; CHECK-NEXT: br %r14 %and = and i32 %sh, 127 diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll --- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll +++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll @@ -15,41 +15,39 @@ ; ILP-NEXT: pushq %r14 ; ILP-NEXT: pushq %rbx ; ILP-NEXT: movq %rdi, %rax -; ILP-NEXT: xorl %r8d, %r8d ; ILP-NEXT: addl %esi, %esi -; ILP-NEXT: leal 3(%rsi), %r9d -; ILP-NEXT: movl $1, %r11d -; ILP-NEXT: xorl %r14d, %r14d -; ILP-NEXT: movl %r9d, %ecx -; ILP-NEXT: shldq %cl, %r11, %r14 +; ILP-NEXT: movb $125, %r10b +; ILP-NEXT: subb %sil, %r10b +; ILP-NEXT: xorl %r8d, %r8d +; ILP-NEXT: leal -125(%rsi), %r9d ; ILP-NEXT: movl $1, %edx -; ILP-NEXT: shlq %cl, %rdx -; ILP-NEXT: leal -125(%rsi), %r10d -; ILP-NEXT: xorl %ebx, %ebx -; ILP-NEXT: movl %r10d, %ecx -; ILP-NEXT: shldq %cl, %r11, %rbx -; ILP-NEXT: testb $64, %r9b -; ILP-NEXT: cmovneq %rdx, %r14 -; ILP-NEXT: cmovneq %r8, %rdx +; ILP-NEXT: xorl %r11d, %r11d +; ILP-NEXT: movl %r9d, %ecx +; ILP-NEXT: shldq %cl, %rdx, %r11 ; ILP-NEXT: movl $1, %edi ; ILP-NEXT: shlq %cl, %rdi -; ILP-NEXT: movb $125, %cl -; ILP-NEXT: subb %sil, %cl -; ILP-NEXT: shrdq %cl, %r8, %r11 -; ILP-NEXT: testb $64, %cl -; ILP-NEXT: cmovneq %r8, %r11 +; ILP-NEXT: movl $1, %r14d +; ILP-NEXT: movl %r10d, %ecx +; ILP-NEXT: shrdq %cl, %r8, %r14 +; ILP-NEXT: xorl %ebx, %ebx +; ILP-NEXT: shldq %cl, %rdx, %rbx +; ILP-NEXT: shlq %cl, %rdx ; ILP-NEXT: testb $64, %r10b -; ILP-NEXT: cmovneq %rdi, %rbx +; ILP-NEXT: cmovneq %rdx, %rbx +; ILP-NEXT: cmovneq %r8, %rdx +; ILP-NEXT: cmovneq %r8, %r14 +; ILP-NEXT: testb $64, %r9b +; ILP-NEXT: cmovneq %rdi, %r11 ; ILP-NEXT: cmovneq %r8, %rdi -; ILP-NEXT: testb %r9b, %r9b -; ILP-NEXT: cmovsq %r8, %r14 +; ILP-NEXT: addb $3, %sil +; ILP-NEXT: cmovsq %r8, %rbx ; ILP-NEXT: cmovsq %r8, %rdx -; ILP-NEXT: movq %r14, 8(%rax) +; ILP-NEXT: movq %rbx, 8(%rax) ; ILP-NEXT: movq %rdx, (%rax) -; ILP-NEXT: cmovnsq %r8, %rbx -; ILP-NEXT: cmoveq %r8, %rbx -; ILP-NEXT: movq %rbx, 24(%rax) -; ILP-NEXT: cmovnsq %r11, %rdi +; ILP-NEXT: cmovnsq %r8, %r11 +; ILP-NEXT: cmoveq %r8, %r11 +; ILP-NEXT: movq %r11, 24(%rax) +; ILP-NEXT: cmovnsq %r14, %rdi ; ILP-NEXT: cmoveq %r8, %rdi ; ILP-NEXT: movq %rdi, 16(%rax) ; ILP-NEXT: popq %rbx @@ -58,7 +56,6 @@ ; ; HYBRID-LABEL: test1: ; HYBRID: # %bb.0: -; HYBRID-NEXT: pushq %rbx ; HYBRID-NEXT: movq %rdi, %rax ; HYBRID-NEXT: addl %esi, %esi ; HYBRID-NEXT: movb $125, %cl @@ -67,43 +64,36 @@ ; HYBRID-NEXT: movl $1, %edi ; HYBRID-NEXT: movl $1, %r9d ; HYBRID-NEXT: shrdq %cl, %r8, %r9 +; HYBRID-NEXT: xorl %r10d, %r10d +; HYBRID-NEXT: shldq %cl, %rdi, %r10 +; HYBRID-NEXT: movl $1, %r11d +; HYBRID-NEXT: shlq %cl, %r11 ; HYBRID-NEXT: testb $64, %cl +; HYBRID-NEXT: cmovneq %r11, %r10 ; HYBRID-NEXT: cmovneq %r8, %r9 -; HYBRID-NEXT: leal 3(%rsi), %r10d -; HYBRID-NEXT: xorl %r11d, %r11d -; HYBRID-NEXT: movl %r10d, %ecx -; HYBRID-NEXT: shldq %cl, %rdi, %r11 -; HYBRID-NEXT: addb $-125, %sil -; HYBRID-NEXT: xorl %ebx, %ebx -; HYBRID-NEXT: movl %esi, %ecx -; HYBRID-NEXT: shldq %cl, %rdi, %rbx -; HYBRID-NEXT: movl $1, %edx -; HYBRID-NEXT: shlq %cl, %rdx -; HYBRID-NEXT: testb $64, %sil -; HYBRID-NEXT: cmovneq %rdx, %rbx -; HYBRID-NEXT: cmovneq %r8, %rdx -; HYBRID-NEXT: movl %r10d, %ecx +; HYBRID-NEXT: cmovneq %r8, %r11 +; HYBRID-NEXT: leal -125(%rsi), %ecx +; HYBRID-NEXT: xorl %edx, %edx +; HYBRID-NEXT: shldq %cl, %rdi, %rdx ; HYBRID-NEXT: shlq %cl, %rdi -; HYBRID-NEXT: testb $64, %r10b -; HYBRID-NEXT: cmovneq %rdi, %r11 +; HYBRID-NEXT: testb $64, %cl +; HYBRID-NEXT: cmovneq %rdi, %rdx ; HYBRID-NEXT: cmovneq %r8, %rdi -; HYBRID-NEXT: testb %r10b, %r10b +; HYBRID-NEXT: addb $3, %sil +; HYBRID-NEXT: cmovsq %r8, %r10 +; HYBRID-NEXT: movq %r10, 8(%rax) ; HYBRID-NEXT: cmovsq %r8, %r11 -; HYBRID-NEXT: movq %r11, 8(%rax) -; HYBRID-NEXT: cmovsq %r8, %rdi -; HYBRID-NEXT: movq %rdi, (%rax) -; HYBRID-NEXT: cmovnsq %r8, %rbx -; HYBRID-NEXT: cmoveq %r8, %rbx -; HYBRID-NEXT: movq %rbx, 24(%rax) -; HYBRID-NEXT: cmovnsq %r9, %rdx +; HYBRID-NEXT: movq %r11, (%rax) +; HYBRID-NEXT: cmovnsq %r8, %rdx ; HYBRID-NEXT: cmoveq %r8, %rdx -; HYBRID-NEXT: movq %rdx, 16(%rax) -; HYBRID-NEXT: popq %rbx +; HYBRID-NEXT: movq %rdx, 24(%rax) +; HYBRID-NEXT: cmovnsq %r9, %rdi +; HYBRID-NEXT: cmoveq %r8, %rdi +; HYBRID-NEXT: movq %rdi, 16(%rax) ; HYBRID-NEXT: retq ; ; BURR-LABEL: test1: ; BURR: # %bb.0: -; BURR-NEXT: pushq %rbx ; BURR-NEXT: movq %rdi, %rax ; BURR-NEXT: addl %esi, %esi ; BURR-NEXT: movb $125, %cl @@ -112,129 +102,117 @@ ; BURR-NEXT: movl $1, %edi ; BURR-NEXT: movl $1, %r9d ; BURR-NEXT: shrdq %cl, %r8, %r9 +; BURR-NEXT: xorl %r10d, %r10d +; BURR-NEXT: shldq %cl, %rdi, %r10 +; BURR-NEXT: movl $1, %r11d +; BURR-NEXT: shlq %cl, %r11 ; BURR-NEXT: testb $64, %cl +; BURR-NEXT: cmovneq %r11, %r10 ; BURR-NEXT: cmovneq %r8, %r9 -; BURR-NEXT: leal 3(%rsi), %r10d -; BURR-NEXT: xorl %r11d, %r11d -; BURR-NEXT: movl %r10d, %ecx -; BURR-NEXT: shldq %cl, %rdi, %r11 -; BURR-NEXT: addb $-125, %sil -; BURR-NEXT: xorl %ebx, %ebx -; BURR-NEXT: movl %esi, %ecx -; BURR-NEXT: shldq %cl, %rdi, %rbx -; BURR-NEXT: movl $1, %edx -; BURR-NEXT: shlq %cl, %rdx -; BURR-NEXT: testb $64, %sil -; BURR-NEXT: cmovneq %rdx, %rbx -; BURR-NEXT: cmovneq %r8, %rdx -; BURR-NEXT: movl %r10d, %ecx +; BURR-NEXT: cmovneq %r8, %r11 +; BURR-NEXT: leal -125(%rsi), %ecx +; BURR-NEXT: xorl %edx, %edx +; BURR-NEXT: shldq %cl, %rdi, %rdx ; BURR-NEXT: shlq %cl, %rdi -; BURR-NEXT: testb $64, %r10b -; BURR-NEXT: cmovneq %rdi, %r11 +; BURR-NEXT: testb $64, %cl +; BURR-NEXT: cmovneq %rdi, %rdx ; BURR-NEXT: cmovneq %r8, %rdi -; BURR-NEXT: testb %r10b, %r10b +; BURR-NEXT: addb $3, %sil +; BURR-NEXT: cmovsq %r8, %r10 +; BURR-NEXT: movq %r10, 8(%rax) ; BURR-NEXT: cmovsq %r8, %r11 -; BURR-NEXT: movq %r11, 8(%rax) -; BURR-NEXT: cmovsq %r8, %rdi -; BURR-NEXT: movq %rdi, (%rax) -; BURR-NEXT: cmovnsq %r8, %rbx -; BURR-NEXT: cmoveq %r8, %rbx -; BURR-NEXT: movq %rbx, 24(%rax) -; BURR-NEXT: cmovnsq %r9, %rdx +; BURR-NEXT: movq %r11, (%rax) +; BURR-NEXT: cmovnsq %r8, %rdx ; BURR-NEXT: cmoveq %r8, %rdx -; BURR-NEXT: movq %rdx, 16(%rax) -; BURR-NEXT: popq %rbx +; BURR-NEXT: movq %rdx, 24(%rax) +; BURR-NEXT: cmovnsq %r9, %rdi +; BURR-NEXT: cmoveq %r8, %rdi +; BURR-NEXT: movq %rdi, 16(%rax) ; BURR-NEXT: retq ; ; SRC-LABEL: test1: ; SRC: # %bb.0: -; SRC-NEXT: pushq %rbx ; SRC-NEXT: movq %rdi, %rax ; SRC-NEXT: addl %esi, %esi -; SRC-NEXT: leal 3(%rsi), %r9d ; SRC-NEXT: movb $125, %cl ; SRC-NEXT: subb %sil, %cl ; SRC-NEXT: xorl %r8d, %r8d -; SRC-NEXT: movl $1, %edi +; SRC-NEXT: movl $1, %edx ; SRC-NEXT: movl $1, %r10d ; SRC-NEXT: shrdq %cl, %r8, %r10 +; SRC-NEXT: xorl %r9d, %r9d +; SRC-NEXT: shldq %cl, %rdx, %r9 +; SRC-NEXT: movl $1, %r11d +; SRC-NEXT: shlq %cl, %r11 ; SRC-NEXT: testb $64, %cl +; SRC-NEXT: cmovneq %r11, %r9 +; SRC-NEXT: cmovneq %r8, %r11 ; SRC-NEXT: cmovneq %r8, %r10 -; SRC-NEXT: addb $-125, %sil -; SRC-NEXT: xorl %edx, %edx -; SRC-NEXT: movl %esi, %ecx -; SRC-NEXT: shldq %cl, %rdi, %rdx -; SRC-NEXT: xorl %r11d, %r11d -; SRC-NEXT: movl %r9d, %ecx -; SRC-NEXT: shldq %cl, %rdi, %r11 -; SRC-NEXT: movl $1, %ebx -; SRC-NEXT: shlq %cl, %rbx -; SRC-NEXT: testb $64, %r9b -; SRC-NEXT: cmovneq %rbx, %r11 -; SRC-NEXT: cmovneq %r8, %rbx -; SRC-NEXT: movl %esi, %ecx -; SRC-NEXT: shlq %cl, %rdi -; SRC-NEXT: testb $64, %sil -; SRC-NEXT: cmovneq %rdi, %rdx -; SRC-NEXT: cmovneq %r8, %rdi -; SRC-NEXT: testb %r9b, %r9b -; SRC-NEXT: cmovnsq %r10, %rdi -; SRC-NEXT: cmoveq %r8, %rdi -; SRC-NEXT: cmovnsq %r8, %rdx +; SRC-NEXT: leal -125(%rsi), %ecx +; SRC-NEXT: xorl %edi, %edi +; SRC-NEXT: shldq %cl, %rdx, %rdi +; SRC-NEXT: shlq %cl, %rdx +; SRC-NEXT: testb $64, %cl +; SRC-NEXT: cmovneq %rdx, %rdi +; SRC-NEXT: cmovneq %r8, %rdx +; SRC-NEXT: addb $3, %sil +; SRC-NEXT: cmovsq %r8, %r9 +; SRC-NEXT: cmovnsq %r10, %rdx ; SRC-NEXT: cmoveq %r8, %rdx +; SRC-NEXT: cmovnsq %r8, %rdi +; SRC-NEXT: cmoveq %r8, %rdi ; SRC-NEXT: cmovsq %r8, %r11 -; SRC-NEXT: cmovsq %r8, %rbx -; SRC-NEXT: movq %r11, 8(%rax) -; SRC-NEXT: movq %rbx, (%rax) -; SRC-NEXT: movq %rdx, 24(%rax) -; SRC-NEXT: movq %rdi, 16(%rax) -; SRC-NEXT: popq %rbx +; SRC-NEXT: movq %r9, 8(%rax) +; SRC-NEXT: movq %r11, (%rax) +; SRC-NEXT: movq %rdi, 24(%rax) +; SRC-NEXT: movq %rdx, 16(%rax) ; SRC-NEXT: retq ; ; LIN-LABEL: test1: ; LIN: # %bb.0: ; LIN-NEXT: movq %rdi, %rax -; LIN-NEXT: xorl %r9d, %r9d -; LIN-NEXT: movl $1, %r8d -; LIN-NEXT: addl %esi, %esi -; LIN-NEXT: leal 3(%rsi), %ecx -; LIN-NEXT: movl $1, %edi -; LIN-NEXT: shlq %cl, %rdi -; LIN-NEXT: testb $64, %cl -; LIN-NEXT: movq %rdi, %rdx -; LIN-NEXT: cmovneq %r9, %rdx -; LIN-NEXT: testb %cl, %cl -; LIN-NEXT: cmovsq %r9, %rdx -; LIN-NEXT: movq %rdx, (%rax) +; LIN-NEXT: xorl %r8d, %r8d +; LIN-NEXT: movl $1, %r9d +; LIN-NEXT: movb $125, %r10b +; LIN-NEXT: leal (%rsi,%rsi), %edi +; LIN-NEXT: subb %dil, %r10b +; LIN-NEXT: movl $1, %esi +; LIN-NEXT: movl %r10d, %ecx +; LIN-NEXT: shlq %cl, %rsi +; LIN-NEXT: testb $64, %r10b +; LIN-NEXT: movq %rsi, %rcx +; LIN-NEXT: cmovneq %r8, %rcx +; LIN-NEXT: movl %edi, %edx +; LIN-NEXT: addb $3, %dl +; LIN-NEXT: cmovsq %r8, %rcx +; LIN-NEXT: movq %rcx, (%rax) ; LIN-NEXT: xorl %edx, %edx -; LIN-NEXT: # kill: def $cl killed $cl killed $ecx -; LIN-NEXT: shldq %cl, %r8, %rdx -; LIN-NEXT: cmovneq %rdi, %rdx -; LIN-NEXT: cmovsq %r9, %rdx +; LIN-NEXT: movl %r10d, %ecx +; LIN-NEXT: shldq %cl, %r9, %rdx +; LIN-NEXT: cmovneq %rsi, %rdx +; LIN-NEXT: cmovsq %r8, %rdx ; LIN-NEXT: movq %rdx, 8(%rax) -; LIN-NEXT: leal -125(%rsi), %r10d +; LIN-NEXT: addb $-125, %dil +; LIN-NEXT: movl $1, %r11d +; LIN-NEXT: movl %edi, %ecx +; LIN-NEXT: shlq %cl, %r11 +; LIN-NEXT: testb $64, %dil +; LIN-NEXT: movq %r11, %rsi +; LIN-NEXT: cmovneq %r8, %rsi ; LIN-NEXT: movl $1, %edx ; LIN-NEXT: movl %r10d, %ecx -; LIN-NEXT: shlq %cl, %rdx -; LIN-NEXT: testb $64, %r10b -; LIN-NEXT: movq %rdx, %rdi -; LIN-NEXT: cmovneq %r9, %rdi -; LIN-NEXT: movb $125, %cl -; LIN-NEXT: subb %sil, %cl -; LIN-NEXT: movl $1, %esi -; LIN-NEXT: shrdq %cl, %r9, %rsi -; LIN-NEXT: testb $64, %cl -; LIN-NEXT: cmovneq %r9, %rsi -; LIN-NEXT: cmovsq %rdi, %rsi -; LIN-NEXT: cmoveq %r9, %rsi -; LIN-NEXT: movq %rsi, 16(%rax) -; LIN-NEXT: xorl %esi, %esi -; LIN-NEXT: movl %r10d, %ecx -; LIN-NEXT: shldq %cl, %r8, %rsi -; LIN-NEXT: cmovneq %rdx, %rsi -; LIN-NEXT: cmovnsq %r9, %rsi -; LIN-NEXT: cmoveq %r9, %rsi -; LIN-NEXT: movq %rsi, 24(%rax) +; LIN-NEXT: shrdq %cl, %r8, %rdx +; LIN-NEXT: cmovneq %r8, %rdx +; LIN-NEXT: cmovsq %rsi, %rdx +; LIN-NEXT: cmoveq %r8, %rdx +; LIN-NEXT: movq %rdx, 16(%rax) +; LIN-NEXT: xorl %edx, %edx +; LIN-NEXT: movl %edi, %ecx +; LIN-NEXT: shldq %cl, %r9, %rdx +; LIN-NEXT: cmovneq %r11, %rdx +; LIN-NEXT: cmovnsq %r8, %rdx +; LIN-NEXT: cmoveq %r8, %rdx +; LIN-NEXT: movq %rdx, 24(%rax) ; LIN-NEXT: retq %b = add i256 %a, 1 %m = shl i256 %b, 1 diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll --- a/llvm/test/CodeGen/X86/shift-i128.ll +++ b/llvm/test/CodeGen/X86/shift-i128.ll @@ -13,112 +13,124 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $20, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subl $28, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %esi +; i686-NEXT: movb {{[0-9]+}}(%esp), %ah +; i686-NEXT: movb %ah, %al +; i686-NEXT: subb $64, %al +; i686-NEXT: negb %al +; i686-NEXT: movl %ebp, %edx ; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: shrl %cl, %edx -; i686-NEXT: shrl %cl, %edi +; i686-NEXT: shldl %cl, %edi, %edx +; i686-NEXT: shrl %cl, %ebp +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB0_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB0_3 -; i686-NEXT: .LBB0_1: -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: .LBB0_3: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB0_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: .LBB0_5: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl $0, %esi +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB0_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB0_2: # %entry +; i686-NEXT: movl %esi, (%esp) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB0_4 +; i686-NEXT: # %bb.3: # %entry ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi +; i686-NEXT: .LBB0_4: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %edx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB0_6 +; i686-NEXT: # %bb.5: # %entry +; i686-NEXT: movl %edx, %ebp +; i686-NEXT: .LBB0_6: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jb .LBB0_8 +; i686-NEXT: # %bb.7: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB0_8: # %entry +; i686-NEXT: movb %ah, %dl +; i686-NEXT: addb $-64, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrl %cl, %ebp ; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ebx -; i686-NEXT: jne .LBB0_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: .LBB0_7: # %entry -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb %ah, %cl -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: testb $32, %ah ; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB0_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB0_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB0_10 -; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: jne .LBB0_10 +; i686-NEXT: # %bb.9: # %entry +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB0_10: # %entry +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jb .LBB0_11 +; i686-NEXT: # %bb.12: # %entry ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: jmp .LBB0_12 -; i686-NEXT: .LBB0_10: +; i686-NEXT: jmp .LBB0_13 +; i686-NEXT: .LBB0_11: ; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl %ebx, %ecx -; i686-NEXT: .LBB0_12: # %entry +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: .LBB0_13: # %entry ; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB0_14 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB0_14: # %entry -; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB0_15 +; i686-NEXT: # %bb.14: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB0_15: # %entry +; i686-NEXT: movl %ecx, %ebx ; i686-NEXT: movl %eax, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: shrdl %cl, %esi, %ebx ; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB0_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB0_16: # %entry -; i686-NEXT: movb %ah, %cl +; i686-NEXT: jne .LBB0_17 +; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB0_17: # %entry +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB0_19 +; i686-NEXT: # %bb.18: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB0_19: # %entry +; i686-NEXT: cmpb $64, %ah ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %ebp -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB0_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: .LBB0_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jae .LBB0_20 -; i686-NEXT: # %bb.19: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; i686-NEXT: .LBB0_20: # %entry +; i686-NEXT: jae .LBB0_21 +; i686-NEXT: # %bb.20: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; i686-NEXT: .LBB0_21: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB0_22 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: movl (%esp), %esi # 4-byte Reload -; i686-NEXT: .LBB0_22: # %entry +; i686-NEXT: testb %ah, %ah +; i686-NEXT: je .LBB0_23 +; i686-NEXT: # %bb.22: # %entry +; i686-NEXT: movl %ebp, %edx +; i686-NEXT: movl (%esp), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB0_23: # %entry ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; i686-NEXT: movl %eax, 12(%ecx) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; i686-NEXT: movl %eax, 8(%ecx) -; i686-NEXT: movl %esi, 4(%ecx) -; i686-NEXT: movl %ebx, (%ecx) -; i686-NEXT: addl $20, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: movl %edx, (%ecx) +; i686-NEXT: addl $28, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx @@ -150,116 +162,128 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $24, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subl $28, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %esi +; i686-NEXT: movb {{[0-9]+}}(%esp), %ah +; i686-NEXT: movb %ah, %al +; i686-NEXT: subb $64, %al +; i686-NEXT: negb %al +; i686-NEXT: movl %ebx, %edx ; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: shrl %cl, %edx -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: sarl $31, %ebx +; i686-NEXT: shldl %cl, %edi, %edx +; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: sarl %cl, %ebp +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: sarl $31, %esi ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: testb $32, %al -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB1_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB1_3 -; i686-NEXT: .LBB1_1: -; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: .LBB1_3: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB1_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: .LBB1_5: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl +; i686-NEXT: movl $0, %ebx +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: jne .LBB1_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB1_2: # %entry +; i686-NEXT: movl %ebx, (%esp) # 4-byte Spill +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB1_4 +; i686-NEXT: # %bb.3: # %entry +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: .LBB1_4: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %edx +; i686-NEXT: movl %eax, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: shrdl %cl, %edi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB1_6 +; i686-NEXT: # %bb.5: # %entry +; i686-NEXT: movl %edx, %ebp +; i686-NEXT: .LBB1_6: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jb .LBB1_8 +; i686-NEXT: # %bb.7: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB1_8: # %entry +; i686-NEXT: movb %ah, %dl +; i686-NEXT: addb $-64, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi +; i686-NEXT: sarl %cl, %ebp ; i686-NEXT: testb $32, %dl ; i686-NEXT: movl %esi, %ecx -; i686-NEXT: jne .LBB1_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB1_7: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb %ah, %cl -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: jne .LBB1_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB1_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB1_10 -; i686-NEXT: # %bb.11: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB1_12 -; i686-NEXT: .LBB1_10: +; i686-NEXT: jne .LBB1_10 +; i686-NEXT: # %bb.9: # %entry +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB1_10: # %entry +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jb .LBB1_11 +; i686-NEXT: # %bb.12: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB1_13 +; i686-NEXT: .LBB1_11: ; i686-NEXT: movl (%esp), %ecx # 4-byte Reload ; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: .LBB1_12: # %entry +; i686-NEXT: .LBB1_13: # %entry ; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB1_14 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB1_14: # %entry -; i686-NEXT: movl %ebx, %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB1_15 +; i686-NEXT: # %bb.14: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB1_15: # %entry +; i686-NEXT: movl %ecx, %ebx ; i686-NEXT: movl %eax, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edx +; i686-NEXT: shrdl %cl, %esi, %ebx ; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB1_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB1_16: # %entry -; i686-NEXT: movb %ah, %cl +; i686-NEXT: jne .LBB1_17 +; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB1_17: # %entry +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB1_19 +; i686-NEXT: # %bb.18: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB1_19: # %entry +; i686-NEXT: cmpb $64, %ah ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %ebp -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB1_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: .LBB1_18: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: jae .LBB1_20 -; i686-NEXT: # %bb.19: -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, %edi -; i686-NEXT: .LBB1_20: # %entry +; i686-NEXT: jae .LBB1_21 +; i686-NEXT: # %bb.20: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; i686-NEXT: .LBB1_21: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB1_22 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: movl (%esp), %esi # 4-byte Reload -; i686-NEXT: .LBB1_22: # %entry +; i686-NEXT: testb %ah, %ah +; i686-NEXT: je .LBB1_23 +; i686-NEXT: # %bb.22: # %entry +; i686-NEXT: movl %ebp, %edx +; i686-NEXT: movl (%esp), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB1_23: # %entry ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; i686-NEXT: movl %eax, 12(%ecx) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; i686-NEXT: movl %eax, 8(%ecx) -; i686-NEXT: movl %esi, 4(%ecx) -; i686-NEXT: movl %ebx, (%ecx) -; i686-NEXT: addl $24, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 4(%ecx) +; i686-NEXT: movl %edx, (%ecx) +; i686-NEXT: addl $28, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx @@ -292,113 +316,125 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $20, %esp +; i686-NEXT: subl $28, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movb {{[0-9]+}}(%esp), %ah +; i686-NEXT: movb %ah, %al +; i686-NEXT: subb $64, %al +; i686-NEXT: negb %al ; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: movl %ebp, %esi -; i686-NEXT: shll %cl, %esi -; i686-NEXT: movl %edi, %edx -; i686-NEXT: shldl %cl, %ebp, %edx +; i686-NEXT: shll %cl, %ebp +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: shll %cl, %edx ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB2_1 -; i686-NEXT: # %bb.2: # %entry +; i686-NEXT: movl $0, %esi +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB2_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl %edx, %esi +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB2_2: # %entry +; i686-NEXT: movl %esi, (%esp) # 4-byte Spill ; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, (%esp) # 4-byte Spill -; i686-NEXT: jmp .LBB2_3 -; i686-NEXT: .LBB2_1: -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, (%esp) # 4-byte Folded Spill -; i686-NEXT: xorl %esi, %esi -; i686-NEXT: .LBB2_3: # %entry -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %edx -; i686-NEXT: subb $64, %dl -; i686-NEXT: jb .LBB2_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: xorl %esi, %esi -; i686-NEXT: .LBB2_5: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: negb %dl -; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrl %cl, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: shrdl %cl, %edi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: jne .LBB2_4 +; i686-NEXT: # %bb.3: # %entry ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %edi, %ebx +; i686-NEXT: .LBB2_4: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %edx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shldl %cl, %esi, %edx +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB2_6 +; i686-NEXT: # %bb.5: # %entry +; i686-NEXT: movl %edx, %ebp +; i686-NEXT: .LBB2_6: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jb .LBB2_8 +; i686-NEXT: # %bb.7: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB2_8: # %entry +; i686-NEXT: movb %ah, %dl +; i686-NEXT: addb $-64, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shll %cl, %ebp ; i686-NEXT: testb $32, %dl -; i686-NEXT: movl %esi, %ebp -; i686-NEXT: jne .LBB2_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl %ebx, %ebp -; i686-NEXT: .LBB2_7: # %entry -; i686-NEXT: movb %al, %ah -; i686-NEXT: addb $-64, %ah -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movb %ah, %cl -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: testb $32, %ah ; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB2_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: .LBB2_9: # %entry -; i686-NEXT: cmpb $64, %al -; i686-NEXT: jb .LBB2_10 -; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: jne .LBB2_10 +; i686-NEXT: # %bb.9: # %entry +; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: .LBB2_10: # %entry +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jb .LBB2_11 +; i686-NEXT: # %bb.12: # %entry ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: jmp .LBB2_12 -; i686-NEXT: .LBB2_10: +; i686-NEXT: jmp .LBB2_13 +; i686-NEXT: .LBB2_11: ; i686-NEXT: movl (%esp), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: .LBB2_12: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: .LBB2_13: # %entry ; i686-NEXT: movl %ecx, (%esp) # 4-byte Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB2_14 -; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB2_14: # %entry -; i686-NEXT: movl %edx, %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shldl %cl, %ebp, %esi ; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB2_16 -; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB2_16: # %entry -; i686-NEXT: movb %ah, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: jne .LBB2_15 +; i686-NEXT: # %bb.14: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB2_15: # %entry +; i686-NEXT: movl %ecx, %ebx +; i686-NEXT: movl %eax, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %ah -; i686-NEXT: jne .LBB2_18 -; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %edi, %ebx -; i686-NEXT: .LBB2_18: # %entry -; i686-NEXT: cmpb $64, %al +; i686-NEXT: shldl %cl, %esi, %ebx +; i686-NEXT: testb $32, %al ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: jae .LBB2_20 -; i686-NEXT: # %bb.19: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload -; i686-NEXT: .LBB2_20: # %entry +; i686-NEXT: jne .LBB2_17 +; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: .LBB2_17: # %entry +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB2_19 +; i686-NEXT: # %bb.18: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB2_19: # %entry +; i686-NEXT: cmpb $64, %ah +; i686-NEXT: jae .LBB2_21 +; i686-NEXT: # %bb.20: +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: .LBB2_21: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB2_22 -; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %ebx, %edx -; i686-NEXT: movl (%esp), %ebp # 4-byte Reload -; i686-NEXT: .LBB2_22: # %entry +; i686-NEXT: testb %ah, %ah +; i686-NEXT: je .LBB2_23 +; i686-NEXT: # %bb.22: # %entry +; i686-NEXT: movl %ebp, %edx +; i686-NEXT: movl (%esp), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB2_23: # %entry ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload ; i686-NEXT: movl %eax, 4(%ecx) -; i686-NEXT: movl %esi, (%ecx) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, (%ecx) ; i686-NEXT: movl %edx, 12(%ecx) -; i686-NEXT: movl %ebp, 8(%ecx) -; i686-NEXT: addl $20, %esp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, 8(%ecx) +; i686-NEXT: addl $28, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx @@ -464,259 +500,281 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $68, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: subl $72, %esp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: movl %esi, %ebp +; i686-NEXT: movb $64, %dl +; i686-NEXT: movb $64, %ch +; i686-NEXT: subb %al, %ch +; i686-NEXT: movl %esi, %eax +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shll %cl, %eax +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl $0, %esi +; i686-NEXT: jne .LBB6_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl %eax, %esi +; i686-NEXT: .LBB6_2: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: movb %ch, %cl ; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB6_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB6_3 -; i686-NEXT: .LBB6_1: -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB6_3: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: shrdl %cl, %ebx, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB6_5 -; i686-NEXT: # %bb.4: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: .LBB6_5: # %entry +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl $0, %esi +; i686-NEXT: jne .LBB6_4 +; i686-NEXT: # %bb.3: # %entry +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: .LBB6_4: # %entry +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shrl %cl, %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl $0, %eax +; i686-NEXT: jne .LBB6_6 +; i686-NEXT: # %bb.5: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: .LBB6_6: # %entry +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subb %al, %dl ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movb %dl, %cl +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, %eax +; i686-NEXT: jne .LBB6_8 +; i686-NEXT: # %bb.7: # %entry +; i686-NEXT: movl %esi, %eax +; i686-NEXT: .LBB6_8: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: movb %ch, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: shrdl %cl, %eax, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subl $64, %eax ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shrl %cl, %ebx -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx +; i686-NEXT: sbbl $0, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: sbbl $0, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: sbbl $0, %ebx ; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: .LBB6_7: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi -; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB6_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %esi, %ebx -; i686-NEXT: .LBB6_9: # %entry +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB6_10 +; i686-NEXT: # %bb.9: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_10: # %entry ; i686-NEXT: movl %edi, %esi -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: shrl %cl, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB6_11 -; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB6_11: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb %ch, %cl ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %edi -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %edi -; i686-NEXT: shll %cl, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movb $64, %bl +; i686-NEXT: shldl %cl, %ebx, %esi +; i686-NEXT: testb $32, %ch ; i686-NEXT: jne .LBB6_12 +; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_12: # %entry +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB6_14 ; i686-NEXT: # %bb.13: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB6_14 -; i686-NEXT: .LBB6_12: -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %esi, %ebp ; i686-NEXT: .LBB6_14: # %entry -; i686-NEXT: movl %esi, %edi ; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: shrl %cl, %esi ; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl $0, %ecx ; i686-NEXT: jne .LBB6_16 ; i686-NEXT: # %bb.15: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, %ecx ; i686-NEXT: .LBB6_16: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: subb %al, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %bl +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shll %cl, %ebx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %dl ; i686-NEXT: movl $0, %ecx ; i686-NEXT: jne .LBB6_18 ; i686-NEXT: # %bb.17: # %entry -; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; i686-NEXT: .LBB6_18: # %entry ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: setae %bh -; i686-NEXT: jb .LBB6_20 +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB6_20 ; i686-NEXT: # %bb.19: # %entry -; i686-NEXT: xorl %edi, %edi -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_20: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %esi, %edi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shldl %cl, %ebp, %esi +; i686-NEXT: testb $32, %dl ; i686-NEXT: jne .LBB6_22 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_22: # %entry -; i686-NEXT: testb %bh, %bh +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: shrl %cl, %esi +; i686-NEXT: testb $32, %al +; i686-NEXT: movl $0, %ecx ; i686-NEXT: jne .LBB6_24 -; i686-NEXT: # %bb.23: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: # %bb.23: # %entry +; i686-NEXT: movl %esi, %ecx ; i686-NEXT: .LBB6_24: # %entry -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl $0, %ecx +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: jne .LBB6_26 ; i686-NEXT: # %bb.25: # %entry -; i686-NEXT: movl %esi, %ecx +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_26: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx ; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %edi, %esi -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB6_28 +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: movl %esi, %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: setae %dl +; i686-NEXT: jb .LBB6_28 ; i686-NEXT: # %bb.27: # %entry -; i686-NEXT: movl %esi, %ebp +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill ; i686-NEXT: .LBB6_28: # %entry -; i686-NEXT: testb %bh, %bh ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: jne .LBB6_30 -; i686-NEXT: # %bb.29: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: # %bb.29: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_30: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload +; i686-NEXT: testb %dl, %dl ; i686-NEXT: jne .LBB6_32 -; i686-NEXT: # %bb.31: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: # %bb.31: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB6_32: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shrdl %cl, %ebp, %edi -; i686-NEXT: movl %edi, %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload ; i686-NEXT: testb $32, %cl -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: je .LBB6_33 -; i686-NEXT: # %bb.34: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_35 -; i686-NEXT: .LBB6_36: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB6_38 -; i686-NEXT: .LBB6_37: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl $0, %ecx +; i686-NEXT: jne .LBB6_34 +; i686-NEXT: # %bb.33: # %entry +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: .LBB6_34: # %entry ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb %dl, %dl +; i686-NEXT: movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload +; i686-NEXT: je .LBB6_35 +; i686-NEXT: # %bb.36: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb %ch, %ch +; i686-NEXT: je .LBB6_38 +; i686-NEXT: jmp .LBB6_39 +; i686-NEXT: .LBB6_35: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload +; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb %ch, %ch +; i686-NEXT: jne .LBB6_39 ; i686-NEXT: .LBB6_38: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB6_39: # %entry +; i686-NEXT: movb %al, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: shrdl %cl, %edx, %ebp +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB6_41 +; i686-NEXT: # %bb.40: # %entry +; i686-NEXT: movl %ebp, %esi +; i686-NEXT: .LBB6_41: # %entry +; i686-NEXT: testb %ch, %ch +; i686-NEXT: je .LBB6_43 +; i686-NEXT: # %bb.42: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; i686-NEXT: .LBB6_43: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload +; i686-NEXT: testb %ch, %ch +; i686-NEXT: jne .LBB6_44 +; i686-NEXT: # %bb.45: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jmp .LBB6_46 +; i686-NEXT: .LBB6_44: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_46: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx ; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %ecx, %edx -; i686-NEXT: je .LBB6_40 -; i686-NEXT: # %bb.39: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: .LBB6_40: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl %edx, %eax ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: je .LBB6_42 -; i686-NEXT: # %bb.41: # %entry +; i686-NEXT: orl {{[0-9]+}}(%esp), %eax +; i686-NEXT: orl %ecx, %eax +; i686-NEXT: je .LBB6_48 +; i686-NEXT: # %bb.47: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB6_48: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: orl %ecx, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: je .LBB6_50 +; i686-NEXT: # %bb.49: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: .LBB6_50: # %entry ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: .LBB6_42: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 28(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 24(%ecx) +; i686-NEXT: movl %esi, 28(%eax) +; i686-NEXT: movl %edx, 24(%eax) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl %edx, 12(%eax) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 8(%ecx) -; i686-NEXT: movl %esi, 20(%ecx) -; i686-NEXT: movl %eax, 16(%ecx) -; i686-NEXT: movl %ebx, 4(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, (%ecx) -; i686-NEXT: addl $68, %esp +; i686-NEXT: movl %edx, 8(%eax) +; i686-NEXT: movl %ecx, 20(%eax) +; i686-NEXT: movl %edi, 16(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 4(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, (%eax) +; i686-NEXT: addl $72, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx ; i686-NEXT: popl %ebp ; i686-NEXT: retl -; i686-NEXT: .LBB6_33: # %entry -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB6_36 -; i686-NEXT: .LBB6_35: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload -; i686-NEXT: movl %ecx, %edi -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB6_37 -; i686-NEXT: jmp .LBB6_38 ; ; x86_64-LABEL: test_lshr_v2i128: ; x86_64: # %bb.0: # %entry @@ -756,261 +814,289 @@ ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi ; i686-NEXT: subl $80, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl %ebp, %ebx -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: sarl %cl, %ebx -; i686-NEXT: movl %esi, %edi -; i686-NEXT: shrl %cl, %edi -; i686-NEXT: shrdl %cl, %esi, %edx -; i686-NEXT: sarl $31, %ebp +; i686-NEXT: movb $64, %dl +; i686-NEXT: movb $64, %ch +; i686-NEXT: subb %al, %ch +; i686-NEXT: movl %edi, %eax +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shll %cl, %eax ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jne .LBB7_1 -; i686-NEXT: # %bb.2: # %entry -; i686-NEXT: movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl $0, %edi +; i686-NEXT: jne .LBB7_2 +; i686-NEXT: # %bb.1: # %entry +; i686-NEXT: movl %eax, %edi +; i686-NEXT: .LBB7_2: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, %ebp +; i686-NEXT: movb %ch, %cl +; i686-NEXT: sarl %cl, %ebp +; i686-NEXT: movl %esi, %edi +; i686-NEXT: sarl $31, %edi +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax ; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB7_4 +; i686-NEXT: # %bb.3: # %entry +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: .LBB7_4: # %entry +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shrl %cl, %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx ; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB7_3 -; i686-NEXT: .LBB7_1: +; i686-NEXT: movl $0, %eax +; i686-NEXT: jne .LBB7_6 +; i686-NEXT: # %bb.5: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: .LBB7_6: # %entry +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subb %al, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movb %dl, %cl +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: testb $32, %dl ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jne .LBB7_8 +; i686-NEXT: # %bb.7: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_8: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movb %ch, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: shrdl %cl, %eax, %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subl $64, %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: sbbl $0, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: sbbl $0, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: sbbl $0, %ebx +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB7_10 +; i686-NEXT: # %bb.9: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_10: # %entry +; i686-NEXT: movl %esi, %edi +; i686-NEXT: movb %ch, %cl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB7_12 +; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_12: # %entry +; i686-NEXT: movl %ebx, %edi +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shrdl %cl, %esi, %edi +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB7_14 +; i686-NEXT: # %bb.13: # %entry +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: .LBB7_14: # %entry ; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_3: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: shrdl %cl, %edx, %edi -; i686-NEXT: testb $32, %al -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: jne .LBB7_5 -; i686-NEXT: # %bb.4: # %entry ; i686-NEXT: movl %edi, %ebx -; i686-NEXT: .LBB7_5: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %edi -; i686-NEXT: movl %edx, %ecx -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrl %cl, %esi -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: sarl $31, %ebp ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: subl $64, %ecx +; i686-NEXT: sarl %cl, %ebx +; i686-NEXT: movl %edi, %ecx +; i686-NEXT: sarl $31, %ecx +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: jne .LBB7_16 +; i686-NEXT: # %bb.15: # %entry +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: .LBB7_16: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %edi, %ebp +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shll %cl, %ebp ; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, %esi +; i686-NEXT: jne .LBB7_18 +; i686-NEXT: # %bb.17: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_18: # %entry ; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: jne .LBB7_7 -; i686-NEXT: # %bb.6: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB7_7: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: movl %edx, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: shrdl %cl, %ebp, %esi +; i686-NEXT: shrdl %cl, %ebp, %ebx ; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB7_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: .LBB7_9: # %entry +; i686-NEXT: jne .LBB7_20 +; i686-NEXT: # %bb.19: # %entry ; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: sarl %cl, %esi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: jne .LBB7_11 -; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %esi, %ecx -; i686-NEXT: .LBB7_11: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: .LBB7_20: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %ebp -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ebx, %ebp -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %cl -; i686-NEXT: movb $64, %bl -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: je .LBB7_13 -; i686-NEXT: # %bb.12: -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: xorl %ebp, %ebp -; i686-NEXT: .LBB7_13: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shrdl %cl, %edi, %esi +; i686-NEXT: shldl %cl, %edi, %ebx ; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB7_15 -; i686-NEXT: # %bb.14: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_15: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: subb %al, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: jne .LBB7_17 -; i686-NEXT: # %bb.16: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_17: # %entry -; i686-NEXT: movl %eax, %ecx -; i686-NEXT: subl $64, %ecx -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: sbbl $0, %esi -; i686-NEXT: setae %bh -; i686-NEXT: jb .LBB7_19 -; i686-NEXT: # %bb.18: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_19: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shrdl %cl, %edi, %esi -; i686-NEXT: sarl %cl, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: je .LBB7_20 +; i686-NEXT: jne .LBB7_22 ; i686-NEXT: # %bb.21: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: je .LBB7_22 -; i686-NEXT: .LBB7_23: # %entry -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB7_25 +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_22: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: sarl %cl, %ebx +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %al +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: jne .LBB7_24 +; i686-NEXT: # %bb.23: # %entry +; i686-NEXT: movl %ebx, %eax ; i686-NEXT: .LBB7_24: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_25: # %entry -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %bl -; i686-NEXT: jne .LBB7_27 -; i686-NEXT: # %bb.26: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB7_27: # %entry -; i686-NEXT: testb %bh, %bh -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: jne .LBB7_29 -; i686-NEXT: # %bb.28: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload -; i686-NEXT: orl %ebp, %ebx +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_29: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB7_31 -; i686-NEXT: # %bb.30: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB7_31: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %edi, %ebx +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: shrdl %cl, %eax, %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: jne .LBB7_26 +; i686-NEXT: # %bb.25: # %entry +; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: .LBB7_26: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %ebp, %ebx +; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: movl %eax, %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: setae %dl +; i686-NEXT: jb .LBB7_28 +; i686-NEXT: # %bb.27: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_28: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: shrdl %cl, %esi, %ebp +; i686-NEXT: sarl %cl, %esi ; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB7_33 -; i686-NEXT: # %bb.32: # %entry -; i686-NEXT: movl %ebx, %esi -; i686-NEXT: .LBB7_33: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: jne .LBB7_30 +; i686-NEXT: # %bb.29: # %entry +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_30: # %entry +; i686-NEXT: testb %dl, %dl +; i686-NEXT: jne .LBB7_32 +; i686-NEXT: # %bb.31: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload +; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_32: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: jne .LBB7_34 +; i686-NEXT: # %bb.33: # %entry +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: .LBB7_34: # %entry +; i686-NEXT: testb %dl, %dl ; i686-NEXT: je .LBB7_35 -; i686-NEXT: # %bb.34: +; i686-NEXT: # %bb.36: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebx, %ecx -; i686-NEXT: movl %ecx, %esi -; i686-NEXT: .LBB7_35: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB7_37 -; i686-NEXT: # %bb.36: +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB7_37 +; i686-NEXT: .LBB7_35: ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_37: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %ecx, %edx -; i686-NEXT: je .LBB7_39 +; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload +; i686-NEXT: testb %dl, %dl +; i686-NEXT: jne .LBB7_39 ; i686-NEXT: # %bb.38: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB7_39: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shrdl %cl, %esi, %edi +; i686-NEXT: testb $32, %al +; i686-NEXT: jne .LBB7_41 +; i686-NEXT: # %bb.40: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_41: # %entry +; i686-NEXT: testb %dl, %dl +; i686-NEXT: je .LBB7_43 +; i686-NEXT: # %bb.42: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_43: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: testb %dl, %dl +; i686-NEXT: jne .LBB7_44 +; i686-NEXT: # %bb.45: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jmp .LBB7_46 +; i686-NEXT: .LBB7_44: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_46: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; i686-NEXT: movl {{[0-9]+}}(%esp), %edx ; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax ; i686-NEXT: orl {{[0-9]+}}(%esp), %eax ; i686-NEXT: orl %edx, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: je .LBB7_41 -; i686-NEXT: # %bb.40: # %entry +; i686-NEXT: je .LBB7_48 +; i686-NEXT: # %bb.47: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: .LBB7_41: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 28(%ecx) -; i686-NEXT: movl %edi, 24(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 12(%ecx) +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB7_48: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %edx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: orl %edx, %ebx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: je .LBB7_50 +; i686-NEXT: # %bb.49: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 8(%ecx) -; i686-NEXT: movl %esi, 20(%ecx) -; i686-NEXT: movl %eax, 16(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, 4(%ecx) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, (%ecx) +; i686-NEXT: .LBB7_50: # %entry +; i686-NEXT: movl %esi, 28(%eax) +; i686-NEXT: movl %ecx, 24(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 12(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 8(%eax) +; i686-NEXT: movl %edx, 20(%eax) +; i686-NEXT: movl %edi, 16(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %ecx, 4(%eax) +; i686-NEXT: movl %ebp, (%eax) ; i686-NEXT: addl $80, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx ; i686-NEXT: popl %ebp ; i686-NEXT: retl -; i686-NEXT: .LBB7_20: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb %bh, %bh -; i686-NEXT: jne .LBB7_23 -; i686-NEXT: .LBB7_22: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb $32, %cl -; i686-NEXT: je .LBB7_24 -; i686-NEXT: jmp .LBB7_25 ; ; x86_64-LABEL: test_ashr_v2i128: ; x86_64: # %bb.0: # %entry @@ -1052,260 +1138,262 @@ ; i686-NEXT: pushl %ebx ; i686-NEXT: pushl %edi ; i686-NEXT: pushl %esi -; i686-NEXT: subl $72, %esp -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: subl $68, %esp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: shll %cl, %esi -; i686-NEXT: movl %edx, %eax -; i686-NEXT: subl $64, %eax -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: sbbl $0, %eax -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl $0, %eax -; i686-NEXT: movl $0, %ecx +; i686-NEXT: movb $64, %dl +; i686-NEXT: movb $64, %ch +; i686-NEXT: subb %al, %ch +; i686-NEXT: movl %ebp, %eax +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shll %cl, %eax +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl $0, %esi ; i686-NEXT: jne .LBB8_2 ; i686-NEXT: # %bb.1: # %entry -; i686-NEXT: movl %esi, %eax -; i686-NEXT: movl %ebp, %ecx +; i686-NEXT: movl %eax, %esi ; i686-NEXT: .LBB8_2: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %edi, %eax -; i686-NEXT: movl %ebx, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %edi, %eax -; i686-NEXT: testb $32, %bl +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shll %cl, %eax +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl $0, %esi ; i686-NEXT: jne .LBB8_4 ; i686-NEXT: # %bb.3: # %entry ; i686-NEXT: movl %eax, %esi ; i686-NEXT: .LBB8_4: # %entry +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shrl %cl, %edi +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx +; i686-NEXT: movl $0, %edi +; i686-NEXT: jne .LBB8_6 +; i686-NEXT: # %bb.5: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: .LBB8_6: # %entry +; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %bl, %cl +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %edi, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subb %al, %dl +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: movb %dl, %cl ; i686-NEXT: shrl %cl, %esi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl $0, %eax +; i686-NEXT: jne .LBB8_8 +; i686-NEXT: # %bb.7: # %entry +; i686-NEXT: movl %esi, %eax +; i686-NEXT: .LBB8_8: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movb %ch, %cl ; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: shrdl %cl, %edi, %eax -; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB8_5 -; i686-NEXT: # %bb.6: # %entry +; i686-NEXT: shrdl %cl, %eax, %esi +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: subl $64, %eax +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: sbbl $0, %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: sbbl $0, %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: sbbl $0, %edi +; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill +; i686-NEXT: testb $32, %ch +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: jne .LBB8_10 +; i686-NEXT: # %bb.9: # %entry ; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: jmp .LBB8_7 -; i686-NEXT: .LBB8_5: -; i686-NEXT: movl %esi, %eax -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_7: # %entry +; i686-NEXT: .LBB8_10: # %entry +; i686-NEXT: movl %edi, %esi +; i686-NEXT: movb %ch, %cl +; i686-NEXT: shldl %cl, %ebp, %esi +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB8_12 +; i686-NEXT: # %bb.11: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_12: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: movb %ch, %cl ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %ebx, %ecx +; i686-NEXT: shldl %cl, %edi, %esi +; i686-NEXT: testb $32, %ch +; i686-NEXT: jne .LBB8_14 +; i686-NEXT: # %bb.13: # %entry +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_14: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %esi, %edi -; i686-NEXT: testb $32, %bl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: jne .LBB8_9 -; i686-NEXT: # %bb.8: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB8_9: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl %ecx, %ebp ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: shll %cl, %ebp -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi ; i686-NEXT: shll %cl, %esi ; i686-NEXT: testb $32, %dl -; i686-NEXT: movl $0, %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl $0, %ecx -; i686-NEXT: jne .LBB8_11 -; i686-NEXT: # %bb.10: # %entry -; i686-NEXT: movl %esi, %edi -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: .LBB8_11: # %entry -; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: jne .LBB8_16 +; i686-NEXT: # %bb.15: # %entry +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: .LBB8_16: # %entry ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ebp, %esi ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shldl %cl, %ebx, %edi +; i686-NEXT: shll %cl, %esi ; i686-NEXT: testb $32, %dl -; i686-NEXT: jne .LBB8_13 -; i686-NEXT: # %bb.12: # %entry -; i686-NEXT: movl %edi, %ebp -; i686-NEXT: .LBB8_13: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movb $64, %cl -; i686-NEXT: subb %dl, %cl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrl %cl, %ebx -; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill -; i686-NEXT: testb $32, %cl ; i686-NEXT: movl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: jne .LBB8_15 -; i686-NEXT: # %bb.14: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: .LBB8_15: # %entry +; i686-NEXT: jne .LBB8_18 +; i686-NEXT: # %bb.17: # %entry +; i686-NEXT: movl %esi, %ecx +; i686-NEXT: .LBB8_18: # %entry ; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl %edx, %ecx +; i686-NEXT: shrdl %cl, %ebx, %edi +; i686-NEXT: testb $32, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: jne .LBB8_20 +; i686-NEXT: # %bb.19: # %entry ; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_20: # %entry +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: movl %ecx, %edi ; i686-NEXT: movl %edx, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: shldl %cl, %ebp, %edi ; i686-NEXT: testb $32, %dl -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: jne .LBB8_17 -; i686-NEXT: # %bb.16: # %entry +; i686-NEXT: jne .LBB8_22 +; i686-NEXT: # %bb.21: # %entry ; i686-NEXT: movl %edi, %esi -; i686-NEXT: .LBB8_17: # %entry -; i686-NEXT: orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: movl %ebx, %eax -; i686-NEXT: subl $64, %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl %ebp, %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: sbbl $0, %ecx -; i686-NEXT: setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill -; i686-NEXT: jb .LBB8_19 -; i686-NEXT: # %bb.18: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_19: # %entry -; i686-NEXT: jb .LBB8_21 -; i686-NEXT: # %bb.20: # %entry -; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill -; i686-NEXT: .LBB8_21: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp -; i686-NEXT: movl %ebp, %ebx +; i686-NEXT: .LBB8_22: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl %eax, %edi ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: shll %cl, %ebx -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: shldl %cl, %ebp, %edi -; i686-NEXT: testb $32, %cl -; i686-NEXT: movl %ebx, %ecx -; i686-NEXT: jne .LBB8_23 -; i686-NEXT: # %bb.22: # %entry -; i686-NEXT: movl %edi, %ecx -; i686-NEXT: .LBB8_23: # %entry -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl %eax, %ecx ; i686-NEXT: shll %cl, %edi +; i686-NEXT: movl %ebx, %ebp +; i686-NEXT: shldl %cl, %eax, %ebp +; i686-NEXT: testb $32, %cl ; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb $32, %al -; i686-NEXT: movl $0, %edi -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: jne .LBB8_25 -; i686-NEXT: # %bb.24: # %entry -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload -; i686-NEXT: .LBB8_25: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB8_27 -; i686-NEXT: # %bb.26: # %entry +; i686-NEXT: jne .LBB8_24 +; i686-NEXT: # %bb.23: # %entry +; i686-NEXT: movl %ebp, %edi +; i686-NEXT: .LBB8_24: # %entry ; i686-NEXT: movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_27: # %entry -; i686-NEXT: movl %eax, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movl %edx, %ecx ; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movl {{[0-9]+}}(%esp), %esi -; i686-NEXT: shldl %cl, %edi, %esi -; i686-NEXT: testb $32, %al -; i686-NEXT: jne .LBB8_29 -; i686-NEXT: # %bb.28: # %entry +; i686-NEXT: shldl %cl, %edi, %ebx +; i686-NEXT: testb $32, %dl +; i686-NEXT: jne .LBB8_26 +; i686-NEXT: # %bb.25: # %entry +; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_26: # %entry ; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_29: # %entry -; i686-NEXT: cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %edi -; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: subl $64, %ecx +; i686-NEXT: movl %ebp, %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edx +; i686-NEXT: sbbl $0, %edx +; i686-NEXT: setae %dl +; i686-NEXT: jb .LBB8_28 +; i686-NEXT: # %bb.27: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_28: # %entry ; i686-NEXT: movl {{[0-9]+}}(%esp), %esi +; i686-NEXT: shll %cl, %esi +; i686-NEXT: testb $32, %cl +; i686-NEXT: movl $0, %ebx ; i686-NEXT: jne .LBB8_30 -; i686-NEXT: # %bb.31: # %entry -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB8_32 -; i686-NEXT: .LBB8_33: # %entry -; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: jne .LBB8_35 -; i686-NEXT: .LBB8_34: # %entry +; i686-NEXT: # %bb.29: # %entry +; i686-NEXT: movl %esi, %ebx +; i686-NEXT: .LBB8_30: # %entry +; i686-NEXT: testb %dl, %dl +; i686-NEXT: jne .LBB8_32 +; i686-NEXT: # %bb.31: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload +; i686-NEXT: .LBB8_32: # %entry ; i686-NEXT: movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_35: # %entry -; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp ; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: shrdl %cl, %ebx, %esi +; i686-NEXT: shldl %cl, %ebp, %ebx ; i686-NEXT: testb $32, %cl -; i686-NEXT: jne .LBB8_37 +; i686-NEXT: jne .LBB8_34 +; i686-NEXT: # %bb.33: # %entry +; i686-NEXT: movl %ebx, %esi +; i686-NEXT: .LBB8_34: # %entry +; i686-NEXT: testb %dl, %dl +; i686-NEXT: movl {{[0-9]+}}(%esp), %ebp +; i686-NEXT: movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload +; i686-NEXT: je .LBB8_35 ; i686-NEXT: # %bb.36: # %entry -; i686-NEXT: movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_37: # %entry -; i686-NEXT: testb %al, %al -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: jne .LBB8_38 -; i686-NEXT: # %bb.39: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: testb %al, %al -; i686-NEXT: jne .LBB8_41 -; i686-NEXT: jmp .LBB8_42 -; i686-NEXT: .LBB8_30: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: orl %ebp, %ecx -; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: testb %al, %al -; i686-NEXT: jne .LBB8_33 -; i686-NEXT: .LBB8_32: # %entry ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: testb %cl, %cl +; i686-NEXT: je .LBB8_38 +; i686-NEXT: jmp .LBB8_39 +; i686-NEXT: .LBB8_35: +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload +; i686-NEXT: testb %cl, %cl +; i686-NEXT: jne .LBB8_39 +; i686-NEXT: .LBB8_38: # %entry ; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: .LBB8_39: # %entry ; i686-NEXT: testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload -; i686-NEXT: je .LBB8_34 -; i686-NEXT: jmp .LBB8_35 -; i686-NEXT: .LBB8_38: -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload -; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx -; i686-NEXT: testb %al, %al -; i686-NEXT: je .LBB8_42 -; i686-NEXT: .LBB8_41: +; i686-NEXT: movl $0, %eax +; i686-NEXT: jne .LBB8_41 +; i686-NEXT: # %bb.40: # %entry ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_42: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl {{[0-9]+}}(%esp), %eax -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl %eax, %edx -; i686-NEXT: je .LBB8_44 -; i686-NEXT: # %bb.43: # %entry -; i686-NEXT: movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_41: # %entry +; i686-NEXT: testb %cl, %cl +; i686-NEXT: je .LBB8_43 +; i686-NEXT: # %bb.42: ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload -; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill -; i686-NEXT: .LBB8_44: # %entry -; i686-NEXT: movl {{[0-9]+}}(%esp), %eax -; i686-NEXT: movl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %edx -; i686-NEXT: orl {{[0-9]+}}(%esp), %ebx -; i686-NEXT: orl %edx, %ebx -; i686-NEXT: je .LBB8_46 +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload +; i686-NEXT: .LBB8_43: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: testb %cl, %cl +; i686-NEXT: jne .LBB8_44 ; i686-NEXT: # %bb.45: # %entry +; i686-NEXT: movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill +; i686-NEXT: jmp .LBB8_46 +; i686-NEXT: .LBB8_44: ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload +; i686-NEXT: orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload +; i686-NEXT: movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill ; i686-NEXT: .LBB8_46: # %entry -; i686-NEXT: movl %esi, 20(%eax) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload -; i686-NEXT: movl %edx, 16(%eax) +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload +; i686-NEXT: orl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl %edi, %ecx +; i686-NEXT: je .LBB8_48 +; i686-NEXT: # %bb.47: # %entry +; i686-NEXT: movl %eax, %ebp +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload +; i686-NEXT: movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill +; i686-NEXT: .LBB8_48: # %entry +; i686-NEXT: movl {{[0-9]+}}(%esp), %eax +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: orl {{[0-9]+}}(%esp), %edi +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: orl %edi, %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %ecx +; i686-NEXT: movl {{[0-9]+}}(%esp), %edi +; i686-NEXT: je .LBB8_50 +; i686-NEXT: # %bb.49: # %entry +; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload +; i686-NEXT: movl %esi, %edi +; i686-NEXT: .LBB8_50: # %entry +; i686-NEXT: movl %edx, 20(%eax) +; i686-NEXT: movl %ebx, 16(%eax) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload ; i686-NEXT: movl %edx, 4(%eax) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload @@ -1314,9 +1402,8 @@ ; i686-NEXT: movl %ecx, 24(%eax) ; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload ; i686-NEXT: movl %ecx, 12(%eax) -; i686-NEXT: movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload -; i686-NEXT: movl %ecx, 8(%eax) -; i686-NEXT: addl $72, %esp +; i686-NEXT: movl %ebp, 8(%eax) +; i686-NEXT: addl $68, %esp ; i686-NEXT: popl %esi ; i686-NEXT: popl %edi ; i686-NEXT: popl %ebx