diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -2790,6 +2790,17 @@
                                 Amt, DAG.getConstant(0, dl, ShTy),
                                 ISD::SETEQ);
 
+  SDValue NVTBitsM1 = DAG.getConstant(NVTBits - 1, dl, ShTy);
+
+  // Mask all shift amounts to be in a valid range.
+  // TODO: This is only really needed if the shift will be expanded to a libcall
+  // later. Otherwise an out of bounds shift produces poison, but the select
+  // won't let it propagate.
+  assert(isPowerOf2_32(NVTBits) && "Unexpected VT");
+  AmtExcess = DAG.getNode(ISD::AND, dl, ShTy, AmtExcess, NVTBitsM1);
+  AmtLack = DAG.getNode(ISD::AND, dl, ShTy, AmtLack, NVTBitsM1);
+  Amt = DAG.getNode(ISD::AND, dl, ShTy, AmtLack, NVTBitsM1);
+
   SDValue LoS, HiS, LoL, HiL;
   switch (N->getOpcode()) {
   default: llvm_unreachable("Unknown shift");
@@ -2833,8 +2844,7 @@
                       DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
 
     // Long: ShAmt >= NVTBits
-    HiL = DAG.getNode(ISD::SRA, dl, NVT, InH,             // Sign of Hi part.
-                      DAG.getConstant(NVTBits - 1, dl, ShTy));
+    HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, NVTBitsM1); // Sign of Hi part.
     LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
 
     Lo = DAG.getSelect(dl, NVT, isZero, InL,
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -5,20 +5,20 @@
 ; GCN-LABEL: v_shl_i128_vv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_sub_i32_e32 v7, vcc, 64, v4
-; GCN-NEXT:    v_lshl_b64 v[5:6], v[2:3], v4
-; GCN-NEXT:    v_lshr_b64 v[7:8], v[0:1], v7
+; GCN-NEXT:    v_sub_i32_e32 v9, vcc, 64, v4
+; GCN-NEXT:    v_lshr_b64 v[5:6], v[0:1], v9
+; GCN-NEXT:    v_lshl_b64 v[7:8], v[2:3], v9
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GCN-NEXT:    v_or_b32_e32 v7, v5, v7
+; GCN-NEXT:    v_or_b32_e32 v7, v7, v5
 ; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, 64, v4
-; GCN-NEXT:    v_or_b32_e32 v8, v6, v8
+; GCN-NEXT:    v_or_b32_e32 v8, v8, v6
 ; GCN-NEXT:    v_lshl_b64 v[5:6], v[0:1], v5
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GCN-NEXT:    v_lshl_b64 v[0:1], v[0:1], v9
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GCN-NEXT:    v_lshl_b64 v[0:1], v[0:1], v4
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v6, v8, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v2, v5, v2, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v5, v3, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v4, v3, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -30,20 +30,20 @@
 ; GCN-LABEL: v_lshr_i128_vv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_sub_i32_e32 v7, vcc, 64, v4
-; GCN-NEXT:    v_lshr_b64 v[5:6], v[0:1], v4
-; GCN-NEXT:    v_lshl_b64 v[7:8], v[2:3], v7
+; GCN-NEXT:    v_sub_i32_e32 v9, vcc, 64, v4
+; GCN-NEXT:    v_lshl_b64 v[5:6], v[2:3], v9
+; GCN-NEXT:    v_lshr_b64 v[7:8], v[0:1], v9
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GCN-NEXT:    v_or_b32_e32 v7, v5, v7
+; GCN-NEXT:    v_or_b32_e32 v7, v7, v5
 ; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, 64, v4
-; GCN-NEXT:    v_or_b32_e32 v8, v6, v8
+; GCN-NEXT:    v_or_b32_e32 v8, v8, v6
 ; GCN-NEXT:    v_lshr_b64 v[5:6], v[2:3], v5
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
+; GCN-NEXT:    v_lshr_b64 v[2:3], v[2:3], v9
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
-; GCN-NEXT:    v_lshr_b64 v[2:3], v[2:3], v4
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v6, v8, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
@@ -56,20 +56,20 @@
 ; GCN-LABEL: v_ashr_i128_vv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_sub_i32_e32 v7, vcc, 64, v4
-; GCN-NEXT:    v_lshr_b64 v[5:6], v[0:1], v4
-; GCN-NEXT:    v_lshl_b64 v[7:8], v[2:3], v7
+; GCN-NEXT:    v_sub_i32_e32 v9, vcc, 64, v4
+; GCN-NEXT:    v_lshl_b64 v[5:6], v[2:3], v9
+; GCN-NEXT:    v_lshr_b64 v[7:8], v[0:1], v9
 ; GCN-NEXT:    v_cmp_eq_u32_e64 s[4:5], 0, v4
-; GCN-NEXT:    v_or_b32_e32 v7, v5, v7
+; GCN-NEXT:    v_or_b32_e32 v7, v7, v5
 ; GCN-NEXT:    v_subrev_i32_e32 v5, vcc, 64, v4
-; GCN-NEXT:    v_or_b32_e32 v8, v6, v8
+; GCN-NEXT:    v_or_b32_e32 v8, v8, v6
 ; GCN-NEXT:    v_ashr_i64 v[5:6], v[2:3], v5
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v4
 ; GCN-NEXT:    v_cndmask_b32_e32 v5, v5, v7, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v6, v8, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v0, v5, v0, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e32 v5, v6, v8, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v5, v1, s[4:5]
-; GCN-NEXT:    v_ashr_i64 v[4:5], v[2:3], v4
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v4, v1, s[4:5]
+; GCN-NEXT:    v_ashr_i64 v[4:5], v[2:3], v9
 ; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v3
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v3, v4, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
@@ -125,15 +125,15 @@
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_sub_i32_e32 v1, vcc, 64, v0
+; GCN-NEXT:    v_subrev_i32_e32 v4, vcc, 64, v0
 ; GCN-NEXT:    v_lshr_b64 v[2:3], 17, v1
-; GCN-NEXT:    v_subrev_i32_e32 v1, vcc, 64, v0
-; GCN-NEXT:    v_lshl_b64 v[4:5], 17, v1
+; GCN-NEXT:    v_lshl_b64 v[4:5], 17, v4
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, v1, s[4:5]
-; GCN-NEXT:    v_lshl_b64 v[0:1], 17, v0
+; GCN-NEXT:    v_lshl_b64 v[0:1], 17, v1
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v4, v2, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v5, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e64 v3, 0, v3, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -146,8 +146,9 @@
 ; GCN-LABEL: v_lshr_i128_kv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_sub_i32_e32 v1, vcc, 64, v0
 ; GCN-NEXT:    s_mov_b64 s[4:5], 0x41
-; GCN-NEXT:    v_lshr_b64 v[1:2], s[4:5], v0
+; GCN-NEXT:    v_lshr_b64 v[1:2], s[4:5], v1
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -166,7 +167,8 @@
 ; GCN-LABEL: v_ashr_i128_kv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_lshr_b64 v[1:2], 33, v0
+; GCN-NEXT:    v_sub_i32_e32 v1, vcc, 64, v0
+; GCN-NEXT:    v_lshr_b64 v[1:2], 33, v1
 ; GCN-NEXT:    v_cmp_gt_u32_e32 vcc, 64, v0
 ; GCN-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -187,18 +189,18 @@
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_sub_i32 s5, s4, 64
-; GCN-NEXT:    s_sub_i32 s12, 64, s4
-; GCN-NEXT:    s_lshl_b64 s[6:7], s[2:3], s4
-; GCN-NEXT:    s_lshl_b64 s[8:9], s[0:1], s4
+; GCN-NEXT:    s_sub_i32 s5, 64, s4
+; GCN-NEXT:    s_sub_i32 s12, s4, 64
+; GCN-NEXT:    s_lshr_b64 s[6:7], s[0:1], s5
+; GCN-NEXT:    s_lshl_b64 s[8:9], s[2:3], s5
 ; GCN-NEXT:    s_lshl_b64 s[10:11], s[0:1], s5
-; GCN-NEXT:    s_lshr_b64 s[0:1], s[0:1], s12
-; GCN-NEXT:    s_or_b64 s[0:1], s[6:7], s[0:1]
+; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], s12
+; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
 ; GCN-NEXT:    s_cmp_lt_u32 s4, 64
-; GCN-NEXT:    s_cselect_b32 s0, s0, s10
-; GCN-NEXT:    s_cselect_b32 s1, s1, s11
-; GCN-NEXT:    s_cselect_b32 s5, s9, 0
-; GCN-NEXT:    s_cselect_b32 s6, s8, 0
+; GCN-NEXT:    s_cselect_b32 s0, s6, s0
+; GCN-NEXT:    s_cselect_b32 s1, s7, s1
+; GCN-NEXT:    s_cselect_b32 s5, s11, 0
+; GCN-NEXT:    s_cselect_b32 s6, s10, 0
 ; GCN-NEXT:    s_cmp_eq_u32 s4, 0
 ; GCN-NEXT:    s_cselect_b32 s1, s3, s1
 ; GCN-NEXT:    s_cselect_b32 s0, s2, s0
@@ -220,18 +222,18 @@
 ; GCN-NEXT:    v_mov_b32_e32 v4, 0
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_sub_i32 s5, s4, 64
-; GCN-NEXT:    s_sub_i32 s12, 64, s4
-; GCN-NEXT:    s_lshr_b64 s[6:7], s[0:1], s4
-; GCN-NEXT:    s_lshr_b64 s[8:9], s[2:3], s4
+; GCN-NEXT:    s_sub_i32 s5, 64, s4
+; GCN-NEXT:    s_sub_i32 s12, s4, 64
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[2:3], s5
+; GCN-NEXT:    s_lshr_b64 s[8:9], s[0:1], s5
 ; GCN-NEXT:    s_lshr_b64 s[10:11], s[2:3], s5
-; GCN-NEXT:    s_lshl_b64 s[2:3], s[2:3], s12
-; GCN-NEXT:    s_or_b64 s[2:3], s[6:7], s[2:3]
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], s12
+; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
 ; GCN-NEXT:    s_cmp_lt_u32 s4, 64
-; GCN-NEXT:    s_cselect_b32 s2, s2, s10
-; GCN-NEXT:    s_cselect_b32 s3, s3, s11
-; GCN-NEXT:    s_cselect_b32 s5, s9, 0
-; GCN-NEXT:    s_cselect_b32 s6, s8, 0
+; GCN-NEXT:    s_cselect_b32 s2, s6, s2
+; GCN-NEXT:    s_cselect_b32 s3, s7, s3
+; GCN-NEXT:    s_cselect_b32 s5, s11, 0
+; GCN-NEXT:    s_cselect_b32 s6, s10, 0
 ; GCN-NEXT:    s_cmp_eq_u32 s4, 0
 ; GCN-NEXT:    s_cselect_b32 s1, s1, s3
 ; GCN-NEXT:    s_cselect_b32 s0, s0, s2
@@ -254,25 +256,25 @@
 ; GCN-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_sub_i32 s5, 64, s4
-; GCN-NEXT:    s_lshr_b64 s[6:7], s[0:1], s4
-; GCN-NEXT:    s_sub_i32 s10, s4, 64
-; GCN-NEXT:    s_lshl_b64 s[8:9], s[2:3], s5
-; GCN-NEXT:    s_ashr_i32 s12, s3, 31
-; GCN-NEXT:    s_ashr_i64 s[10:11], s[2:3], s10
-; GCN-NEXT:    s_or_b64 s[6:7], s[6:7], s[8:9]
-; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], s4
+; GCN-NEXT:    s_sub_i32 s12, s4, 64
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[2:3], s5
+; GCN-NEXT:    s_lshr_b64 s[8:9], s[0:1], s5
+; GCN-NEXT:    s_ashr_i64 s[10:11], s[2:3], s5
+; GCN-NEXT:    s_ashr_i64 s[12:13], s[2:3], s12
+; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
+; GCN-NEXT:    s_ashr_i32 s2, s3, 31
 ; GCN-NEXT:    s_cmp_lt_u32 s4, 64
-; GCN-NEXT:    s_cselect_b32 s3, s3, s12
-; GCN-NEXT:    s_cselect_b32 s2, s2, s12
-; GCN-NEXT:    s_cselect_b32 s5, s6, s10
-; GCN-NEXT:    s_cselect_b32 s6, s7, s11
+; GCN-NEXT:    s_cselect_b32 s3, s6, s12
+; GCN-NEXT:    s_cselect_b32 s5, s7, s13
+; GCN-NEXT:    s_cselect_b32 s6, s11, s2
+; GCN-NEXT:    s_cselect_b32 s2, s10, s2
 ; GCN-NEXT:    s_cmp_eq_u32 s4, 0
-; GCN-NEXT:    s_cselect_b32 s1, s1, s6
-; GCN-NEXT:    s_cselect_b32 s0, s0, s5
+; GCN-NEXT:    s_cselect_b32 s1, s1, s5
+; GCN-NEXT:    s_cselect_b32 s0, s0, s3
 ; GCN-NEXT:    v_mov_b32_e32 v0, s0
 ; GCN-NEXT:    v_mov_b32_e32 v1, s1
 ; GCN-NEXT:    v_mov_b32_e32 v2, s2
-; GCN-NEXT:    v_mov_b32_e32 v3, s3
+; GCN-NEXT:    v_mov_b32_e32 v3, s6
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = ashr i128 %lhs, %rhs
@@ -284,42 +286,42 @@
 ; GCN-LABEL: v_shl_v2i128_vv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_sub_i32_e32 v16, vcc, 64, v8
-; GCN-NEXT:    v_lshr_b64 v[16:17], v[0:1], v16
-; GCN-NEXT:    v_lshl_b64 v[18:19], v[2:3], v8
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[8:9]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
-; GCN-NEXT:    v_subrev_i32_e64 v9, s[6:7], 64, v8
-; GCN-NEXT:    v_or_b32_e32 v19, v19, v17
-; GCN-NEXT:    v_or_b32_e32 v18, v18, v16
+; GCN-NEXT:    v_sub_i32_e32 v20, vcc, 64, v8
+; GCN-NEXT:    v_lshr_b64 v[16:17], v[0:1], v20
+; GCN-NEXT:    v_lshl_b64 v[18:19], v[2:3], v20
+; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GCN-NEXT:    v_or_b32_e32 v10, v8, v10
-; GCN-NEXT:    v_lshl_b64 v[16:17], v[0:1], v9
-; GCN-NEXT:    s_and_b64 vcc, s[4:5], vcc
+; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[8:9]
+; GCN-NEXT:    v_subrev_i32_e64 v8, s[6:7], 64, v8
+; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
+; GCN-NEXT:    v_lshl_b64 v[8:9], v[0:1], v8
+; GCN-NEXT:    v_or_b32_e32 v17, v19, v17
+; GCN-NEXT:    v_or_b32_e32 v16, v18, v16
+; GCN-NEXT:    s_and_b64 vcc, vcc, s[4:5]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; GCN-NEXT:    v_cndmask_b32_e32 v9, v16, v18, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v2, v9, v2, s[4:5]
-; GCN-NEXT:    v_sub_i32_e64 v9, s[6:7], 64, v12
-; GCN-NEXT:    v_cndmask_b32_e32 v11, v17, v19, vcc
-; GCN-NEXT:    v_lshr_b64 v[9:10], v[4:5], v9
-; GCN-NEXT:    v_lshl_b64 v[16:17], v[6:7], v12
-; GCN-NEXT:    v_cndmask_b32_e64 v3, v11, v3, s[4:5]
-; GCN-NEXT:    v_or_b32_e32 v16, v16, v9
+; GCN-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v16, v9, v17, vcc
+; GCN-NEXT:    v_sub_i32_e64 v17, s[6:7], 64, v12
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v8, v2, s[4:5]
+; GCN-NEXT:    v_lshr_b64 v[8:9], v[4:5], v17
+; GCN-NEXT:    v_lshl_b64 v[10:11], v[6:7], v17
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v16, v3, s[4:5]
+; GCN-NEXT:    v_or_b32_e32 v10, v10, v8
 ; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[12:13]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
-; GCN-NEXT:    v_subrev_i32_e64 v9, s[8:9], 64, v12
-; GCN-NEXT:    v_or_b32_e32 v11, v17, v10
-; GCN-NEXT:    v_lshl_b64 v[9:10], v[4:5], v9
-; GCN-NEXT:    v_or_b32_e32 v15, v13, v15
-; GCN-NEXT:    v_or_b32_e32 v14, v12, v14
+; GCN-NEXT:    v_subrev_i32_e64 v8, s[8:9], 64, v12
+; GCN-NEXT:    v_or_b32_e32 v16, v11, v9
+; GCN-NEXT:    v_lshl_b64 v[8:9], v[4:5], v8
 ; GCN-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
-; GCN-NEXT:    v_lshl_b64 v[0:1], v[0:1], v8
-; GCN-NEXT:    v_lshl_b64 v[4:5], v[4:5], v12
-; GCN-NEXT:    v_cndmask_b32_e64 v6, v9, v6, s[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, v10, v11, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v7, v9, v7, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; GCN-NEXT:    v_or_b32_e32 v11, v13, v15
+; GCN-NEXT:    v_or_b32_e32 v10, v12, v14
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[10:11]
+; GCN-NEXT:    v_lshl_b64 v[0:1], v[0:1], v20
+; GCN-NEXT:    v_lshl_b64 v[4:5], v[4:5], v17
+; GCN-NEXT:    v_cndmask_b32_e64 v6, v8, v6, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v9, v16, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v7, v8, v7, s[6:7]
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[4:5]
@@ -333,42 +335,42 @@
 ; GCN-LABEL: v_lshr_v2i128_vv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_sub_i32_e32 v16, vcc, 64, v8
-; GCN-NEXT:    v_lshl_b64 v[16:17], v[2:3], v16
-; GCN-NEXT:    v_lshr_b64 v[18:19], v[0:1], v8
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[8:9]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
-; GCN-NEXT:    v_subrev_i32_e64 v9, s[6:7], 64, v8
-; GCN-NEXT:    v_or_b32_e32 v19, v19, v17
-; GCN-NEXT:    v_or_b32_e32 v18, v18, v16
+; GCN-NEXT:    v_sub_i32_e32 v20, vcc, 64, v8
+; GCN-NEXT:    v_lshl_b64 v[16:17], v[2:3], v20
+; GCN-NEXT:    v_lshr_b64 v[18:19], v[0:1], v20
+; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GCN-NEXT:    v_or_b32_e32 v10, v8, v10
-; GCN-NEXT:    v_lshr_b64 v[16:17], v[2:3], v9
-; GCN-NEXT:    s_and_b64 vcc, s[4:5], vcc
+; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[8:9]
+; GCN-NEXT:    v_subrev_i32_e64 v8, s[6:7], 64, v8
+; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
+; GCN-NEXT:    v_lshr_b64 v[8:9], v[2:3], v8
+; GCN-NEXT:    v_or_b32_e32 v17, v19, v17
+; GCN-NEXT:    v_or_b32_e32 v16, v18, v16
+; GCN-NEXT:    s_and_b64 vcc, vcc, s[4:5]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; GCN-NEXT:    v_cndmask_b32_e32 v9, v16, v18, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v9, v0, s[4:5]
-; GCN-NEXT:    v_sub_i32_e64 v9, s[6:7], 64, v12
-; GCN-NEXT:    v_cndmask_b32_e32 v11, v17, v19, vcc
-; GCN-NEXT:    v_lshl_b64 v[9:10], v[6:7], v9
-; GCN-NEXT:    v_lshr_b64 v[16:17], v[4:5], v12
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
-; GCN-NEXT:    v_or_b32_e32 v16, v16, v9
+; GCN-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v16, v9, v17, vcc
+; GCN-NEXT:    v_sub_i32_e64 v17, s[6:7], 64, v12
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
+; GCN-NEXT:    v_lshl_b64 v[8:9], v[6:7], v17
+; GCN-NEXT:    v_lshr_b64 v[10:11], v[4:5], v17
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s[4:5]
+; GCN-NEXT:    v_or_b32_e32 v10, v10, v8
 ; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[12:13]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
-; GCN-NEXT:    v_subrev_i32_e64 v9, s[8:9], 64, v12
-; GCN-NEXT:    v_or_b32_e32 v11, v17, v10
-; GCN-NEXT:    v_lshr_b64 v[9:10], v[6:7], v9
-; GCN-NEXT:    v_or_b32_e32 v15, v13, v15
-; GCN-NEXT:    v_or_b32_e32 v14, v12, v14
+; GCN-NEXT:    v_subrev_i32_e64 v8, s[8:9], 64, v12
+; GCN-NEXT:    v_or_b32_e32 v16, v11, v9
+; GCN-NEXT:    v_lshr_b64 v[8:9], v[6:7], v8
 ; GCN-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
-; GCN-NEXT:    v_lshr_b64 v[2:3], v[2:3], v8
-; GCN-NEXT:    v_lshr_b64 v[6:7], v[6:7], v12
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v9, v4, s[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, v10, v11, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; GCN-NEXT:    v_or_b32_e32 v11, v13, v15
+; GCN-NEXT:    v_or_b32_e32 v10, v12, v14
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[10:11]
+; GCN-NEXT:    v_lshr_b64 v[2:3], v[2:3], v20
+; GCN-NEXT:    v_lshr_b64 v[6:7], v[6:7], v17
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v9, v16, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v8, v5, s[6:7]
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v3, vcc
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[4:5]
@@ -382,45 +384,45 @@
 ; GCN-LABEL: v_ashr_v2i128_vv:
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GCN-NEXT:    v_sub_i32_e32 v16, vcc, 64, v8
-; GCN-NEXT:    v_lshl_b64 v[16:17], v[2:3], v16
-; GCN-NEXT:    v_lshr_b64 v[18:19], v[0:1], v8
-; GCN-NEXT:    v_cmp_gt_u64_e32 vcc, 64, v[8:9]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
-; GCN-NEXT:    v_subrev_i32_e64 v9, s[6:7], 64, v8
-; GCN-NEXT:    v_or_b32_e32 v19, v19, v17
-; GCN-NEXT:    v_or_b32_e32 v18, v18, v16
+; GCN-NEXT:    v_sub_i32_e32 v20, vcc, 64, v8
+; GCN-NEXT:    v_lshl_b64 v[16:17], v[2:3], v20
+; GCN-NEXT:    v_lshr_b64 v[18:19], v[0:1], v20
+; GCN-NEXT:    v_cmp_eq_u64_e32 vcc, 0, v[10:11]
 ; GCN-NEXT:    v_or_b32_e32 v10, v8, v10
-; GCN-NEXT:    v_ashr_i64 v[16:17], v[2:3], v9
-; GCN-NEXT:    s_and_b64 vcc, s[4:5], vcc
+; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[8:9]
+; GCN-NEXT:    v_subrev_i32_e64 v8, s[6:7], 64, v8
+; GCN-NEXT:    v_or_b32_e32 v11, v9, v11
+; GCN-NEXT:    v_ashr_i64 v[8:9], v[2:3], v8
+; GCN-NEXT:    v_or_b32_e32 v17, v19, v17
+; GCN-NEXT:    v_or_b32_e32 v16, v18, v16
+; GCN-NEXT:    s_and_b64 vcc, vcc, s[4:5]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[4:5], 0, v[10:11]
-; GCN-NEXT:    v_cndmask_b32_e32 v9, v16, v18, vcc
-; GCN-NEXT:    v_cndmask_b32_e64 v0, v9, v0, s[4:5]
-; GCN-NEXT:    v_sub_i32_e64 v9, s[6:7], 64, v12
-; GCN-NEXT:    v_cndmask_b32_e32 v11, v17, v19, vcc
-; GCN-NEXT:    v_lshl_b64 v[9:10], v[6:7], v9
-; GCN-NEXT:    v_lshr_b64 v[16:17], v[4:5], v12
-; GCN-NEXT:    v_cndmask_b32_e64 v1, v11, v1, s[4:5]
-; GCN-NEXT:    v_or_b32_e32 v16, v16, v9
+; GCN-NEXT:    v_cndmask_b32_e32 v8, v8, v16, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v16, v9, v17, vcc
+; GCN-NEXT:    v_sub_i32_e64 v17, s[6:7], 64, v12
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v8, v0, s[4:5]
+; GCN-NEXT:    v_lshl_b64 v[8:9], v[6:7], v17
+; GCN-NEXT:    v_lshr_b64 v[10:11], v[4:5], v17
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v16, v1, s[4:5]
+; GCN-NEXT:    v_or_b32_e32 v10, v10, v8
 ; GCN-NEXT:    v_cmp_gt_u64_e64 s[4:5], 64, v[12:13]
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
-; GCN-NEXT:    v_subrev_i32_e64 v9, s[8:9], 64, v12
-; GCN-NEXT:    v_or_b32_e32 v11, v17, v10
-; GCN-NEXT:    v_ashr_i64 v[9:10], v[6:7], v9
-; GCN-NEXT:    v_or_b32_e32 v15, v13, v15
-; GCN-NEXT:    v_or_b32_e32 v14, v12, v14
+; GCN-NEXT:    v_subrev_i32_e64 v8, s[8:9], 64, v12
+; GCN-NEXT:    v_or_b32_e32 v16, v11, v9
+; GCN-NEXT:    v_ashr_i64 v[8:9], v[6:7], v8
 ; GCN-NEXT:    s_and_b64 s[4:5], s[6:7], s[4:5]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[14:15]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, v9, v16, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v4, v9, v4, s[6:7]
-; GCN-NEXT:    v_cndmask_b32_e64 v9, v10, v11, s[4:5]
-; GCN-NEXT:    v_cndmask_b32_e64 v5, v9, v5, s[6:7]
-; GCN-NEXT:    v_ashr_i64 v[8:9], v[2:3], v8
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v8, v10, s[4:5]
+; GCN-NEXT:    v_or_b32_e32 v11, v13, v15
+; GCN-NEXT:    v_or_b32_e32 v10, v12, v14
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[6:7], 0, v[10:11]
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v8, v4, s[6:7]
+; GCN-NEXT:    v_cndmask_b32_e64 v8, v9, v16, s[4:5]
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v8, v5, s[6:7]
+; GCN-NEXT:    v_ashr_i64 v[8:9], v[2:3], v20
 ; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v3
 ; GCN-NEXT:    v_cndmask_b32_e32 v2, v3, v8, vcc
 ; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v9, vcc
-; GCN-NEXT:    v_ashr_i64 v[8:9], v[6:7], v12
+; GCN-NEXT:    v_ashr_i64 v[8:9], v[6:7], v17
 ; GCN-NEXT:    v_ashrrev_i32_e32 v7, 31, v7
 ; GCN-NEXT:    v_cndmask_b32_e64 v6, v7, v8, s[4:5]
 ; GCN-NEXT:    v_cndmask_b32_e64 v7, v7, v9, s[4:5]
@@ -441,59 +443,59 @@
 ; GCN-NEXT:    v_cmp_lt_u64_e64 s[16:17], s[8:9], 64
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[10:11], 0
 ; GCN-NEXT:    s_sub_i32 s22, 64, s8
-; GCN-NEXT:    s_sub_i32 s20, s8, 64
-; GCN-NEXT:    s_lshr_b64 s[22:23], s[0:1], s22
+; GCN-NEXT:    s_lshl_b64 s[20:21], s[0:1], s22
 ; GCN-NEXT:    s_and_b64 s[16:17], s[18:19], s[16:17]
-; GCN-NEXT:    s_lshl_b64 s[18:19], s[2:3], s8
-; GCN-NEXT:    s_lshl_b64 s[20:21], s[0:1], s20
-; GCN-NEXT:    s_or_b64 s[18:19], s[18:19], s[22:23]
-; GCN-NEXT:    s_and_b64 s[22:23], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s19, s19, s21
-; GCN-NEXT:    s_or_b64 s[10:11], s[8:9], s[10:11]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], 0
-; GCN-NEXT:    s_and_b64 s[22:23], s[10:11], exec
-; GCN-NEXT:    s_cselect_b32 s9, s3, s19
-; GCN-NEXT:    s_and_b64 s[22:23], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s3, s18, s20
-; GCN-NEXT:    s_and_b64 s[10:11], s[10:11], exec
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[10:11], s[12:13], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[14:15], 0
-; GCN-NEXT:    s_cselect_b32 s22, s2, s3
-; GCN-NEXT:    s_and_b64 s[2:3], s[18:19], s[10:11]
-; GCN-NEXT:    s_sub_i32 s18, 64, s12
-; GCN-NEXT:    s_sub_i32 s10, s12, 64
-; GCN-NEXT:    s_lshr_b64 s[18:19], s[4:5], s18
-; GCN-NEXT:    s_lshl_b64 s[20:21], s[6:7], s12
-; GCN-NEXT:    s_lshl_b64 s[10:11], s[4:5], s10
+; GCN-NEXT:    s_and_b64 s[18:19], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s23, s21, 0
+; GCN-NEXT:    s_cselect_b32 s24, s20, 0
+; GCN-NEXT:    s_lshr_b64 s[18:19], s[0:1], s22
+; GCN-NEXT:    s_lshl_b64 s[20:21], s[2:3], s22
 ; GCN-NEXT:    s_or_b64 s[18:19], s[20:21], s[18:19]
-; GCN-NEXT:    s_and_b64 s[20:21], s[2:3], exec
-; GCN-NEXT:    s_cselect_b32 s11, s19, s11
-; GCN-NEXT:    s_or_b64 s[14:15], s[12:13], s[14:15]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[14:15], 0
-; GCN-NEXT:    s_and_b64 s[20:21], s[14:15], exec
-; GCN-NEXT:    s_cselect_b32 s13, s7, s11
-; GCN-NEXT:    s_and_b64 s[20:21], s[2:3], exec
-; GCN-NEXT:    s_cselect_b32 s7, s18, s10
-; GCN-NEXT:    s_and_b64 s[10:11], s[14:15], exec
-; GCN-NEXT:    s_cselect_b32 s10, s6, s7
-; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], s8
-; GCN-NEXT:    s_and_b64 s[6:7], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s6, s1, 0
-; GCN-NEXT:    s_cselect_b32 s7, s0, 0
-; GCN-NEXT:    s_lshl_b64 s[0:1], s[4:5], s12
-; GCN-NEXT:    s_and_b64 s[2:3], s[2:3], exec
-; GCN-NEXT:    s_cselect_b32 s1, s1, 0
-; GCN-NEXT:    s_cselect_b32 s0, s0, 0
-; GCN-NEXT:    v_mov_b32_e32 v0, s0
-; GCN-NEXT:    v_mov_b32_e32 v1, s1
-; GCN-NEXT:    v_mov_b32_e32 v2, s10
-; GCN-NEXT:    v_mov_b32_e32 v3, s13
+; GCN-NEXT:    s_sub_i32 s20, s8, 64
+; GCN-NEXT:    s_lshl_b64 s[0:1], s[0:1], s20
+; GCN-NEXT:    s_and_b64 s[20:21], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s1, s19, s1
+; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
+; GCN-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s19, s3, s1
+; GCN-NEXT:    s_and_b64 s[10:11], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s3, s18, s0
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], exec
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[12:13], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[14:15], 0
+; GCN-NEXT:    s_cselect_b32 s16, s2, s3
+; GCN-NEXT:    s_sub_i32 s10, 64, s12
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], s[0:1]
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[4:5], s10
+; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s17, s3, 0
+; GCN-NEXT:    s_cselect_b32 s18, s2, 0
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[4:5], s10
+; GCN-NEXT:    s_lshl_b64 s[8:9], s[6:7], s10
+; GCN-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
+; GCN-NEXT:    s_sub_i32 s8, s12, 64
+; GCN-NEXT:    s_lshl_b64 s[4:5], s[4:5], s8
+; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s3, s3, s5
+; GCN-NEXT:    s_or_b64 s[8:9], s[12:13], s[14:15]
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
+; GCN-NEXT:    v_mov_b32_e32 v0, s18
+; GCN-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s3, s7, s3
+; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s2, s2, s4
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s0, s6, s2
+; GCN-NEXT:    v_mov_b32_e32 v1, s17
+; GCN-NEXT:    v_mov_b32_e32 v2, s0
+; GCN-NEXT:    v_mov_b32_e32 v3, s3
 ; GCN-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_mov_b32_e32 v0, s7
-; GCN-NEXT:    v_mov_b32_e32 v1, s6
-; GCN-NEXT:    v_mov_b32_e32 v2, s22
-; GCN-NEXT:    v_mov_b32_e32 v3, s9
+; GCN-NEXT:    v_mov_b32_e32 v0, s24
+; GCN-NEXT:    v_mov_b32_e32 v1, s23
+; GCN-NEXT:    v_mov_b32_e32 v2, s16
+; GCN-NEXT:    v_mov_b32_e32 v3, s19
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = shl <2 x i128> %lhs, %rhs
@@ -513,59 +515,59 @@
 ; GCN-NEXT:    v_cmp_lt_u64_e64 s[16:17], s[8:9], 64
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[10:11], 0
 ; GCN-NEXT:    s_sub_i32 s22, 64, s8
-; GCN-NEXT:    s_sub_i32 s20, s8, 64
-; GCN-NEXT:    s_lshl_b64 s[22:23], s[2:3], s22
+; GCN-NEXT:    s_lshr_b64 s[20:21], s[2:3], s22
 ; GCN-NEXT:    s_and_b64 s[16:17], s[18:19], s[16:17]
-; GCN-NEXT:    s_lshr_b64 s[18:19], s[0:1], s8
-; GCN-NEXT:    s_lshr_b64 s[20:21], s[2:3], s20
-; GCN-NEXT:    s_or_b64 s[18:19], s[18:19], s[22:23]
-; GCN-NEXT:    s_and_b64 s[22:23], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s19, s19, s21
-; GCN-NEXT:    s_or_b64 s[10:11], s[8:9], s[10:11]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], 0
-; GCN-NEXT:    s_and_b64 s[22:23], s[10:11], exec
-; GCN-NEXT:    s_cselect_b32 s9, s1, s19
-; GCN-NEXT:    s_and_b64 s[22:23], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s1, s18, s20
-; GCN-NEXT:    s_and_b64 s[10:11], s[10:11], exec
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[10:11], s[12:13], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[14:15], 0
-; GCN-NEXT:    s_cselect_b32 s22, s0, s1
-; GCN-NEXT:    s_and_b64 s[0:1], s[18:19], s[10:11]
-; GCN-NEXT:    s_sub_i32 s18, 64, s12
-; GCN-NEXT:    s_sub_i32 s10, s12, 64
-; GCN-NEXT:    s_lshl_b64 s[18:19], s[6:7], s18
-; GCN-NEXT:    s_lshr_b64 s[20:21], s[4:5], s12
-; GCN-NEXT:    s_lshr_b64 s[10:11], s[6:7], s10
+; GCN-NEXT:    s_and_b64 s[18:19], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s23, s21, 0
+; GCN-NEXT:    s_cselect_b32 s24, s20, 0
+; GCN-NEXT:    s_lshl_b64 s[18:19], s[2:3], s22
+; GCN-NEXT:    s_lshr_b64 s[20:21], s[0:1], s22
 ; GCN-NEXT:    s_or_b64 s[18:19], s[20:21], s[18:19]
-; GCN-NEXT:    s_and_b64 s[20:21], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s11, s19, s11
-; GCN-NEXT:    s_or_b64 s[14:15], s[12:13], s[14:15]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[14:15], 0
-; GCN-NEXT:    s_and_b64 s[20:21], s[14:15], exec
-; GCN-NEXT:    s_cselect_b32 s13, s5, s11
-; GCN-NEXT:    s_and_b64 s[20:21], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s5, s18, s10
-; GCN-NEXT:    s_and_b64 s[10:11], s[14:15], exec
-; GCN-NEXT:    s_cselect_b32 s10, s4, s5
-; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], s8
-; GCN-NEXT:    s_and_b64 s[4:5], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s4, s3, 0
-; GCN-NEXT:    s_cselect_b32 s5, s2, 0
-; GCN-NEXT:    s_lshr_b64 s[2:3], s[6:7], s12
+; GCN-NEXT:    s_sub_i32 s20, s8, 64
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[2:3], s20
+; GCN-NEXT:    s_and_b64 s[20:21], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s3, s19, s3
+; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
+; GCN-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s19, s1, s3
+; GCN-NEXT:    s_and_b64 s[10:11], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s1, s18, s2
+; GCN-NEXT:    s_and_b64 s[2:3], s[8:9], exec
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[12:13], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[14:15], 0
+; GCN-NEXT:    s_cselect_b32 s16, s0, s1
+; GCN-NEXT:    s_sub_i32 s10, 64, s12
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], s[2:3]
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[6:7], s10
+; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s17, s3, 0
+; GCN-NEXT:    s_cselect_b32 s18, s2, 0
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[6:7], s10
+; GCN-NEXT:    s_lshr_b64 s[8:9], s[4:5], s10
+; GCN-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
+; GCN-NEXT:    s_sub_i32 s8, s12, 64
+; GCN-NEXT:    s_lshr_b64 s[6:7], s[6:7], s8
+; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s3, s3, s7
+; GCN-NEXT:    s_or_b64 s[8:9], s[12:13], s[14:15]
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
+; GCN-NEXT:    v_mov_b32_e32 v2, s18
+; GCN-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s3, s5, s3
 ; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s0, s3, 0
-; GCN-NEXT:    s_cselect_b32 s1, s2, 0
-; GCN-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NEXT:    v_mov_b32_e32 v1, s13
-; GCN-NEXT:    v_mov_b32_e32 v2, s1
-; GCN-NEXT:    v_mov_b32_e32 v3, s0
+; GCN-NEXT:    s_cselect_b32 s2, s2, s6
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s0, s4, s2
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v1, s3
+; GCN-NEXT:    v_mov_b32_e32 v3, s17
 ; GCN-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_mov_b32_e32 v0, s22
-; GCN-NEXT:    v_mov_b32_e32 v1, s9
-; GCN-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NEXT:    v_mov_b32_e32 v3, s4
+; GCN-NEXT:    v_mov_b32_e32 v0, s16
+; GCN-NEXT:    v_mov_b32_e32 v1, s19
+; GCN-NEXT:    v_mov_b32_e32 v2, s24
+; GCN-NEXT:    v_mov_b32_e32 v3, s23
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = lshr <2 x i128> %lhs, %rhs
@@ -585,61 +587,61 @@
 ; GCN-NEXT:    v_cmp_lt_u64_e64 s[16:17], s[8:9], 64
 ; GCN-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[10:11], 0
 ; GCN-NEXT:    s_sub_i32 s22, 64, s8
-; GCN-NEXT:    s_sub_i32 s20, s8, 64
-; GCN-NEXT:    s_lshl_b64 s[22:23], s[2:3], s22
+; GCN-NEXT:    s_ashr_i32 s23, s3, 31
+; GCN-NEXT:    s_ashr_i64 s[20:21], s[2:3], s22
 ; GCN-NEXT:    s_and_b64 s[16:17], s[18:19], s[16:17]
-; GCN-NEXT:    s_lshr_b64 s[18:19], s[0:1], s8
-; GCN-NEXT:    s_ashr_i64 s[20:21], s[2:3], s20
-; GCN-NEXT:    s_or_b64 s[18:19], s[18:19], s[22:23]
-; GCN-NEXT:    s_and_b64 s[22:23], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s19, s19, s21
-; GCN-NEXT:    s_or_b64 s[10:11], s[8:9], s[10:11]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[10:11], s[10:11], 0
-; GCN-NEXT:    s_and_b64 s[22:23], s[10:11], exec
-; GCN-NEXT:    s_cselect_b32 s9, s1, s19
-; GCN-NEXT:    s_and_b64 s[22:23], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s1, s18, s20
-; GCN-NEXT:    s_and_b64 s[10:11], s[10:11], exec
-; GCN-NEXT:    v_cmp_lt_u64_e64 s[10:11], s[12:13], 64
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[18:19], s[14:15], 0
-; GCN-NEXT:    s_cselect_b32 s22, s0, s1
-; GCN-NEXT:    s_and_b64 s[0:1], s[18:19], s[10:11]
-; GCN-NEXT:    s_sub_i32 s18, 64, s12
-; GCN-NEXT:    s_sub_i32 s10, s12, 64
-; GCN-NEXT:    s_lshl_b64 s[18:19], s[6:7], s18
-; GCN-NEXT:    s_lshr_b64 s[20:21], s[4:5], s12
-; GCN-NEXT:    s_ashr_i64 s[10:11], s[6:7], s10
+; GCN-NEXT:    s_and_b64 s[18:19], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s24, s21, s23
+; GCN-NEXT:    s_cselect_b32 s23, s20, s23
+; GCN-NEXT:    s_lshl_b64 s[18:19], s[2:3], s22
+; GCN-NEXT:    s_lshr_b64 s[20:21], s[0:1], s22
 ; GCN-NEXT:    s_or_b64 s[18:19], s[20:21], s[18:19]
-; GCN-NEXT:    s_and_b64 s[20:21], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s11, s19, s11
-; GCN-NEXT:    s_or_b64 s[14:15], s[12:13], s[14:15]
-; GCN-NEXT:    v_cmp_eq_u64_e64 s[14:15], s[14:15], 0
-; GCN-NEXT:    s_and_b64 s[20:21], s[14:15], exec
-; GCN-NEXT:    s_cselect_b32 s13, s5, s11
-; GCN-NEXT:    s_and_b64 s[20:21], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s5, s18, s10
-; GCN-NEXT:    s_and_b64 s[10:11], s[14:15], exec
-; GCN-NEXT:    s_cselect_b32 s10, s4, s5
-; GCN-NEXT:    s_ashr_i32 s11, s3, 31
-; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], s8
-; GCN-NEXT:    s_and_b64 s[4:5], s[16:17], exec
-; GCN-NEXT:    s_cselect_b32 s4, s3, s11
-; GCN-NEXT:    s_cselect_b32 s5, s2, s11
-; GCN-NEXT:    s_ashr_i32 s8, s7, 31
-; GCN-NEXT:    s_ashr_i64 s[2:3], s[6:7], s12
+; GCN-NEXT:    s_sub_i32 s20, s8, 64
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], s20
+; GCN-NEXT:    s_and_b64 s[20:21], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s3, s19, s3
+; GCN-NEXT:    s_or_b64 s[8:9], s[8:9], s[10:11]
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
+; GCN-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s19, s1, s3
+; GCN-NEXT:    s_and_b64 s[10:11], s[16:17], exec
+; GCN-NEXT:    s_cselect_b32 s1, s18, s2
+; GCN-NEXT:    s_and_b64 s[2:3], s[8:9], exec
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[2:3], s[12:13], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[14:15], 0
+; GCN-NEXT:    s_cselect_b32 s16, s0, s1
+; GCN-NEXT:    s_sub_i32 s10, 64, s12
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], s[2:3]
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[6:7], s10
+; GCN-NEXT:    s_ashr_i32 s11, s7, 31
+; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s17, s3, s11
+; GCN-NEXT:    s_cselect_b32 s18, s2, s11
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[6:7], s10
+; GCN-NEXT:    s_lshr_b64 s[8:9], s[4:5], s10
+; GCN-NEXT:    s_or_b64 s[2:3], s[8:9], s[2:3]
+; GCN-NEXT:    s_sub_i32 s8, s12, 64
+; GCN-NEXT:    s_ashr_i64 s[6:7], s[6:7], s8
+; GCN-NEXT:    s_and_b64 s[8:9], s[0:1], exec
+; GCN-NEXT:    s_cselect_b32 s3, s3, s7
+; GCN-NEXT:    s_or_b64 s[8:9], s[12:13], s[14:15]
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[8:9], s[8:9], 0
+; GCN-NEXT:    v_mov_b32_e32 v2, s18
+; GCN-NEXT:    s_and_b64 s[10:11], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s3, s5, s3
 ; GCN-NEXT:    s_and_b64 s[0:1], s[0:1], exec
-; GCN-NEXT:    s_cselect_b32 s0, s3, s8
-; GCN-NEXT:    s_cselect_b32 s1, s2, s8
-; GCN-NEXT:    v_mov_b32_e32 v0, s10
-; GCN-NEXT:    v_mov_b32_e32 v1, s13
-; GCN-NEXT:    v_mov_b32_e32 v2, s1
-; GCN-NEXT:    v_mov_b32_e32 v3, s0
+; GCN-NEXT:    s_cselect_b32 s2, s2, s6
+; GCN-NEXT:    s_and_b64 s[0:1], s[8:9], exec
+; GCN-NEXT:    s_cselect_b32 s0, s4, s2
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v1, s3
+; GCN-NEXT:    v_mov_b32_e32 v3, s17
 ; GCN-NEXT:    flat_store_dwordx4 v[6:7], v[0:3]
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    v_mov_b32_e32 v0, s22
-; GCN-NEXT:    v_mov_b32_e32 v1, s9
-; GCN-NEXT:    v_mov_b32_e32 v2, s5
-; GCN-NEXT:    v_mov_b32_e32 v3, s4
+; GCN-NEXT:    v_mov_b32_e32 v0, s16
+; GCN-NEXT:    v_mov_b32_e32 v1, s19
+; GCN-NEXT:    v_mov_b32_e32 v2, s23
+; GCN-NEXT:    v_mov_b32_e32 v3, s24
 ; GCN-NEXT:    flat_store_dwordx4 v[4:5], v[0:3]
 ; GCN-NEXT:    s_endpgm
   %shift = ashr <2 x i128> %lhs, %rhs
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll
--- a/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/ashr.ll
@@ -396,106 +396,108 @@
 define signext i128 @ashr_i128(i128 signext %a, i128 signext %b) {
 ; MIPS-LABEL: ashr_i128:
 ; MIPS:       # %bb.0: # %entry
-; MIPS-NEXT:    lw $2, 28($sp)
+; MIPS-NEXT:    lw $10, 28($sp)
 ; MIPS-NEXT:    addiu $1, $zero, 64
-; MIPS-NEXT:    subu $9, $1, $2
-; MIPS-NEXT:    sllv $10, $5, $9
-; MIPS-NEXT:    andi $13, $9, 32
-; MIPS-NEXT:    andi $3, $2, 32
-; MIPS-NEXT:    addiu $11, $zero, 0
-; MIPS-NEXT:    bnez $13, $BB5_2
-; MIPS-NEXT:    addiu $12, $zero, 0
+; MIPS-NEXT:    subu $1, $1, $10
+; MIPS-NEXT:    andi $2, $1, 63
+; MIPS-NEXT:    not $3, $2
+; MIPS-NEXT:    srlv $9, $6, $2
+; MIPS-NEXT:    andi $8, $1, 32
+; MIPS-NEXT:    bnez $8, $BB5_3
+; MIPS-NEXT:    addiu $15, $zero, 0
 ; MIPS-NEXT:  # %bb.1: # %entry
-; MIPS-NEXT:    move $12, $10
-; MIPS-NEXT:  $BB5_2: # %entry
-; MIPS-NEXT:    not $8, $2
-; MIPS-NEXT:    bnez $3, $BB5_5
-; MIPS-NEXT:    srlv $14, $6, $2
-; MIPS-NEXT:  # %bb.3: # %entry
-; MIPS-NEXT:    sll $1, $6, 1
-; MIPS-NEXT:    srlv $11, $7, $2
-; MIPS-NEXT:    sllv $1, $1, $8
-; MIPS-NEXT:    or $15, $1, $11
-; MIPS-NEXT:    bnez $13, $BB5_7
-; MIPS-NEXT:    move $11, $14
-; MIPS-NEXT:  # %bb.4: # %entry
-; MIPS-NEXT:    b $BB5_6
+; MIPS-NEXT:    srlv $1, $7, $2
+; MIPS-NEXT:    sll $11, $6, 1
+; MIPS-NEXT:    sllv $11, $11, $3
+; MIPS-NEXT:    or $gp, $11, $1
+; MIPS-NEXT:    move $13, $9
+; MIPS-NEXT:    bnez $8, $BB5_5
+; MIPS-NEXT:    sllv $14, $5, $2
+; MIPS-NEXT:  # %bb.2: # %entry
+; MIPS-NEXT:    b $BB5_4
 ; MIPS-NEXT:    nop
-; MIPS-NEXT:  $BB5_5:
-; MIPS-NEXT:    bnez $13, $BB5_7
+; MIPS-NEXT:  $BB5_3:
+; MIPS-NEXT:    addiu $13, $zero, 0
+; MIPS-NEXT:    move $gp, $9
+; MIPS-NEXT:    bnez $8, $BB5_5
+; MIPS-NEXT:    sllv $14, $5, $2
+; MIPS-NEXT:  $BB5_4: # %entry
 ; MIPS-NEXT:    move $15, $14
-; MIPS-NEXT:  $BB5_6: # %entry
-; MIPS-NEXT:    sllv $1, $4, $9
-; MIPS-NEXT:    not $9, $9
-; MIPS-NEXT:    srl $10, $5, 1
-; MIPS-NEXT:    srlv $9, $10, $9
-; MIPS-NEXT:    or $10, $1, $9
-; MIPS-NEXT:  $BB5_7: # %entry
-; MIPS-NEXT:    addiu $24, $2, -64
-; MIPS-NEXT:    sll $13, $4, 1
-; MIPS-NEXT:    srav $14, $4, $24
-; MIPS-NEXT:    andi $1, $24, 32
-; MIPS-NEXT:    bnez $1, $BB5_10
+; MIPS-NEXT:  $BB5_5: # %entry
+; MIPS-NEXT:    addiu $1, $10, -64
+; MIPS-NEXT:    andi $24, $1, 63
+; MIPS-NEXT:    sll $11, $4, 1
+; MIPS-NEXT:    srav $12, $4, $24
+; MIPS-NEXT:    andi $1, $1, 32
+; MIPS-NEXT:    bnez $1, $BB5_8
 ; MIPS-NEXT:    sra $9, $4, 31
-; MIPS-NEXT:  # %bb.8: # %entry
+; MIPS-NEXT:  # %bb.6: # %entry
 ; MIPS-NEXT:    srlv $1, $5, $24
 ; MIPS-NEXT:    not $24, $24
-; MIPS-NEXT:    sllv $24, $13, $24
-; MIPS-NEXT:    or $25, $24, $1
-; MIPS-NEXT:    move $24, $14
-; MIPS-NEXT:    sltiu $14, $2, 64
-; MIPS-NEXT:    beqz $14, $BB5_12
+; MIPS-NEXT:    sllv $24, $11, $24
+; MIPS-NEXT:    move $25, $12
+; MIPS-NEXT:    sltiu $12, $10, 64
+; MIPS-NEXT:    beqz $12, $BB5_10
+; MIPS-NEXT:    or $24, $24, $1
+; MIPS-NEXT:  # %bb.7: # %entry
+; MIPS-NEXT:    b $BB5_9
 ; MIPS-NEXT:    nop
-; MIPS-NEXT:  # %bb.9: # %entry
-; MIPS-NEXT:    b $BB5_11
+; MIPS-NEXT:  $BB5_8:
+; MIPS-NEXT:    move $24, $12
+; MIPS-NEXT:    sltiu $12, $10, 64
+; MIPS-NEXT:    beqz $12, $BB5_10
+; MIPS-NEXT:    move $25, $9
+; MIPS-NEXT:  $BB5_9:
+; MIPS-NEXT:    or $24, $gp, $15
+; MIPS-NEXT:  $BB5_10: # %entry
+; MIPS-NEXT:    beqz $8, $BB5_17
 ; MIPS-NEXT:    nop
-; MIPS-NEXT:  $BB5_10:
-; MIPS-NEXT:    move $25, $14
-; MIPS-NEXT:    sltiu $14, $2, 64
-; MIPS-NEXT:    beqz $14, $BB5_12
-; MIPS-NEXT:    move $24, $9
-; MIPS-NEXT:  $BB5_11:
-; MIPS-NEXT:    or $25, $15, $12
+; MIPS-NEXT:  # %bb.11: # %entry
+; MIPS-NEXT:    bnez $12, $BB5_18
+; MIPS-NEXT:    sltiu $10, $10, 1
 ; MIPS-NEXT:  $BB5_12: # %entry
-; MIPS-NEXT:    sltiu $12, $2, 1
-; MIPS-NEXT:    beqz $12, $BB5_18
+; MIPS-NEXT:    beqz $10, $BB5_19
 ; MIPS-NEXT:    nop
-; MIPS-NEXT:  # %bb.13: # %entry
-; MIPS-NEXT:    bnez $14, $BB5_19
+; MIPS-NEXT:  $BB5_13: # %entry
+; MIPS-NEXT:    beqz $10, $BB5_20
 ; MIPS-NEXT:    nop
 ; MIPS-NEXT:  $BB5_14: # %entry
-; MIPS-NEXT:    beqz $12, $BB5_20
-; MIPS-NEXT:    nop
-; MIPS-NEXT:  $BB5_15: # %entry
-; MIPS-NEXT:    bnez $3, $BB5_21
+; MIPS-NEXT:    bnez $8, $BB5_21
 ; MIPS-NEXT:    srav $4, $4, $2
-; MIPS-NEXT:  $BB5_16: # %entry
+; MIPS-NEXT:  $BB5_15: # %entry
 ; MIPS-NEXT:    srlv $1, $5, $2
-; MIPS-NEXT:    sllv $2, $13, $8
+; MIPS-NEXT:    sllv $2, $11, $3
 ; MIPS-NEXT:    or $3, $2, $1
-; MIPS-NEXT:    bnez $14, $BB5_23
+; MIPS-NEXT:    bnez $12, $BB5_23
 ; MIPS-NEXT:    move $2, $4
-; MIPS-NEXT:  # %bb.17: # %entry
+; MIPS-NEXT:  # %bb.16: # %entry
 ; MIPS-NEXT:    b $BB5_22
 ; MIPS-NEXT:    nop
-; MIPS-NEXT:  $BB5_18: # %entry
-; MIPS-NEXT:    beqz $14, $BB5_14
-; MIPS-NEXT:    move $7, $25
-; MIPS-NEXT:  $BB5_19:
-; MIPS-NEXT:    bnez $12, $BB5_15
-; MIPS-NEXT:    or $24, $11, $10
+; MIPS-NEXT:  $BB5_17: # %entry
+; MIPS-NEXT:    sllv $1, $4, $2
+; MIPS-NEXT:    srl $14, $5, 1
+; MIPS-NEXT:    srlv $14, $14, $3
+; MIPS-NEXT:    or $14, $1, $14
+; MIPS-NEXT:    beqz $12, $BB5_12
+; MIPS-NEXT:    sltiu $10, $10, 1
+; MIPS-NEXT:  $BB5_18:
+; MIPS-NEXT:    bnez $10, $BB5_13
+; MIPS-NEXT:    or $25, $13, $14
+; MIPS-NEXT:  $BB5_19: # %entry
+; MIPS-NEXT:    bnez $10, $BB5_14
+; MIPS-NEXT:    move $6, $25
 ; MIPS-NEXT:  $BB5_20: # %entry
-; MIPS-NEXT:    move $6, $24
-; MIPS-NEXT:    beqz $3, $BB5_16
+; MIPS-NEXT:    move $7, $24
+; MIPS-NEXT:    beqz $8, $BB5_15
 ; MIPS-NEXT:    srav $4, $4, $2
 ; MIPS-NEXT:  $BB5_21:
 ; MIPS-NEXT:    move $2, $9
-; MIPS-NEXT:    bnez $14, $BB5_23
+; MIPS-NEXT:    bnez $12, $BB5_23
 ; MIPS-NEXT:    move $3, $4
 ; MIPS-NEXT:  $BB5_22: # %entry
 ; MIPS-NEXT:    move $2, $9
 ; MIPS-NEXT:  $BB5_23: # %entry
-; MIPS-NEXT:    bnez $14, $BB5_25
+; MIPS-NEXT:    bnez $12, $BB5_25
 ; MIPS-NEXT:    nop
 ; MIPS-NEXT:  # %bb.24: # %entry
 ; MIPS-NEXT:    move $3, $9
@@ -506,188 +508,204 @@
 ;
 ; MIPS32-LABEL: ashr_i128:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $9, 28($sp)
-; MIPS32-NEXT:    srlv $1, $7, $9
-; MIPS32-NEXT:    not $2, $9
-; MIPS32-NEXT:    sll $3, $6, 1
-; MIPS32-NEXT:    sllv $3, $3, $2
-; MIPS32-NEXT:    addiu $8, $zero, 64
-; MIPS32-NEXT:    or $1, $3, $1
-; MIPS32-NEXT:    srlv $10, $6, $9
-; MIPS32-NEXT:    subu $3, $8, $9
-; MIPS32-NEXT:    sllv $11, $5, $3
-; MIPS32-NEXT:    andi $12, $3, 32
-; MIPS32-NEXT:    andi $13, $9, 32
-; MIPS32-NEXT:    move $8, $11
-; MIPS32-NEXT:    movn $8, $zero, $12
-; MIPS32-NEXT:    movn $1, $10, $13
-; MIPS32-NEXT:    addiu $14, $9, -64
-; MIPS32-NEXT:    srlv $15, $5, $14
+; MIPS32-NEXT:    lw $2, 28($sp)
+; MIPS32-NEXT:    addiu $1, $zero, 64
+; MIPS32-NEXT:    subu $1, $1, $2
+; MIPS32-NEXT:    andi $9, $1, 63
+; MIPS32-NEXT:    sllv $3, $5, $9
+; MIPS32-NEXT:    andi $10, $1, 32
+; MIPS32-NEXT:    move $1, $3
+; MIPS32-NEXT:    movn $1, $zero, $10
+; MIPS32-NEXT:    srlv $8, $7, $9
+; MIPS32-NEXT:    not $11, $9
+; MIPS32-NEXT:    sll $12, $6, 1
+; MIPS32-NEXT:    sllv $12, $12, $11
+; MIPS32-NEXT:    or $8, $12, $8
+; MIPS32-NEXT:    srlv $12, $6, $9
+; MIPS32-NEXT:    movn $8, $12, $10
+; MIPS32-NEXT:    addiu $13, $2, -64
+; MIPS32-NEXT:    andi $14, $13, 63
+; MIPS32-NEXT:    or $15, $8, $1
+; MIPS32-NEXT:    srlv $1, $5, $14
 ; MIPS32-NEXT:    sll $24, $4, 1
-; MIPS32-NEXT:    not $25, $14
-; MIPS32-NEXT:    sllv $25, $24, $25
-; MIPS32-NEXT:    or $gp, $1, $8
-; MIPS32-NEXT:    or $1, $25, $15
+; MIPS32-NEXT:    not $8, $14
+; MIPS32-NEXT:    sllv $8, $24, $8
+; MIPS32-NEXT:    or $1, $8, $1
 ; MIPS32-NEXT:    srav $8, $4, $14
-; MIPS32-NEXT:    andi $14, $14, 32
-; MIPS32-NEXT:    movn $1, $8, $14
-; MIPS32-NEXT:    sllv $15, $4, $3
-; MIPS32-NEXT:    not $3, $3
+; MIPS32-NEXT:    andi $13, $13, 32
+; MIPS32-NEXT:    movn $1, $8, $13
+; MIPS32-NEXT:    sltiu $14, $2, 64
+; MIPS32-NEXT:    movn $1, $15, $14
+; MIPS32-NEXT:    movn $12, $zero, $10
+; MIPS32-NEXT:    sllv $15, $4, $9
 ; MIPS32-NEXT:    srl $25, $5, 1
-; MIPS32-NEXT:    srlv $3, $25, $3
-; MIPS32-NEXT:    sltiu $25, $9, 64
-; MIPS32-NEXT:    movn $1, $gp, $25
-; MIPS32-NEXT:    or $15, $15, $3
-; MIPS32-NEXT:    srlv $3, $5, $9
-; MIPS32-NEXT:    sllv $2, $24, $2
-; MIPS32-NEXT:    or $5, $2, $3
-; MIPS32-NEXT:    srav $24, $4, $9
-; MIPS32-NEXT:    movn $5, $24, $13
-; MIPS32-NEXT:    sra $2, $4, 31
-; MIPS32-NEXT:    movz $1, $7, $9
-; MIPS32-NEXT:    move $3, $2
-; MIPS32-NEXT:    movn $3, $5, $25
-; MIPS32-NEXT:    movn $15, $11, $12
-; MIPS32-NEXT:    movn $10, $zero, $13
-; MIPS32-NEXT:    or $4, $10, $15
-; MIPS32-NEXT:    movn $8, $2, $14
-; MIPS32-NEXT:    movn $8, $4, $25
-; MIPS32-NEXT:    movz $8, $6, $9
-; MIPS32-NEXT:    movn $24, $2, $13
-; MIPS32-NEXT:    movn $2, $24, $25
+; MIPS32-NEXT:    srlv $25, $25, $11
+; MIPS32-NEXT:    or $15, $15, $25
+; MIPS32-NEXT:    movn $15, $3, $10
+; MIPS32-NEXT:    or $12, $12, $15
+; MIPS32-NEXT:    sra $3, $4, 31
+; MIPS32-NEXT:    movn $8, $3, $13
+; MIPS32-NEXT:    movn $8, $12, $14
+; MIPS32-NEXT:    srav $4, $4, $9
+; MIPS32-NEXT:    movz $8, $6, $2
+; MIPS32-NEXT:    movz $1, $7, $2
+; MIPS32-NEXT:    move $6, $4
+; MIPS32-NEXT:    movn $6, $3, $10
+; MIPS32-NEXT:    move $2, $3
+; MIPS32-NEXT:    movn $2, $6, $14
+; MIPS32-NEXT:    srlv $5, $5, $9
+; MIPS32-NEXT:    sllv $6, $24, $11
+; MIPS32-NEXT:    or $5, $6, $5
+; MIPS32-NEXT:    movn $5, $4, $10
+; MIPS32-NEXT:    movn $3, $5, $14
 ; MIPS32-NEXT:    move $4, $8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    move $5, $1
 ;
 ; 32R2-LABEL: ashr_i128:
 ; 32R2:       # %bb.0: # %entry
-; 32R2-NEXT:    lw $9, 28($sp)
-; 32R2-NEXT:    srlv $1, $7, $9
-; 32R2-NEXT:    not $2, $9
-; 32R2-NEXT:    sll $3, $6, 1
-; 32R2-NEXT:    sllv $3, $3, $2
-; 32R2-NEXT:    addiu $8, $zero, 64
-; 32R2-NEXT:    or $1, $3, $1
-; 32R2-NEXT:    srlv $10, $6, $9
-; 32R2-NEXT:    subu $3, $8, $9
-; 32R2-NEXT:    sllv $11, $5, $3
-; 32R2-NEXT:    andi $12, $3, 32
-; 32R2-NEXT:    andi $13, $9, 32
-; 32R2-NEXT:    move $8, $11
-; 32R2-NEXT:    movn $8, $zero, $12
-; 32R2-NEXT:    movn $1, $10, $13
-; 32R2-NEXT:    addiu $14, $9, -64
-; 32R2-NEXT:    srlv $15, $5, $14
+; 32R2-NEXT:    lw $2, 28($sp)
+; 32R2-NEXT:    addiu $1, $zero, 64
+; 32R2-NEXT:    subu $1, $1, $2
+; 32R2-NEXT:    andi $9, $1, 63
+; 32R2-NEXT:    sllv $3, $5, $9
+; 32R2-NEXT:    andi $10, $1, 32
+; 32R2-NEXT:    move $1, $3
+; 32R2-NEXT:    movn $1, $zero, $10
+; 32R2-NEXT:    srlv $8, $7, $9
+; 32R2-NEXT:    not $11, $9
+; 32R2-NEXT:    sll $12, $6, 1
+; 32R2-NEXT:    sllv $12, $12, $11
+; 32R2-NEXT:    or $8, $12, $8
+; 32R2-NEXT:    srlv $12, $6, $9
+; 32R2-NEXT:    movn $8, $12, $10
+; 32R2-NEXT:    addiu $13, $2, -64
+; 32R2-NEXT:    andi $14, $13, 63
+; 32R2-NEXT:    or $15, $8, $1
+; 32R2-NEXT:    srlv $1, $5, $14
 ; 32R2-NEXT:    sll $24, $4, 1
-; 32R2-NEXT:    not $25, $14
-; 32R2-NEXT:    sllv $25, $24, $25
-; 32R2-NEXT:    or $gp, $1, $8
-; 32R2-NEXT:    or $1, $25, $15
+; 32R2-NEXT:    not $8, $14
+; 32R2-NEXT:    sllv $8, $24, $8
+; 32R2-NEXT:    or $1, $8, $1
 ; 32R2-NEXT:    srav $8, $4, $14
-; 32R2-NEXT:    andi $14, $14, 32
-; 32R2-NEXT:    movn $1, $8, $14
-; 32R2-NEXT:    sllv $15, $4, $3
-; 32R2-NEXT:    not $3, $3
+; 32R2-NEXT:    andi $13, $13, 32
+; 32R2-NEXT:    movn $1, $8, $13
+; 32R2-NEXT:    sltiu $14, $2, 64
+; 32R2-NEXT:    movn $1, $15, $14
+; 32R2-NEXT:    movn $12, $zero, $10
+; 32R2-NEXT:    sllv $15, $4, $9
 ; 32R2-NEXT:    srl $25, $5, 1
-; 32R2-NEXT:    srlv $3, $25, $3
-; 32R2-NEXT:    sltiu $25, $9, 64
-; 32R2-NEXT:    movn $1, $gp, $25
-; 32R2-NEXT:    or $15, $15, $3
-; 32R2-NEXT:    srlv $3, $5, $9
-; 32R2-NEXT:    sllv $2, $24, $2
-; 32R2-NEXT:    or $5, $2, $3
-; 32R2-NEXT:    srav $24, $4, $9
-; 32R2-NEXT:    movn $5, $24, $13
-; 32R2-NEXT:    sra $2, $4, 31
-; 32R2-NEXT:    movz $1, $7, $9
-; 32R2-NEXT:    move $3, $2
-; 32R2-NEXT:    movn $3, $5, $25
-; 32R2-NEXT:    movn $15, $11, $12
-; 32R2-NEXT:    movn $10, $zero, $13
-; 32R2-NEXT:    or $4, $10, $15
-; 32R2-NEXT:    movn $8, $2, $14
-; 32R2-NEXT:    movn $8, $4, $25
-; 32R2-NEXT:    movz $8, $6, $9
-; 32R2-NEXT:    movn $24, $2, $13
-; 32R2-NEXT:    movn $2, $24, $25
+; 32R2-NEXT:    srlv $25, $25, $11
+; 32R2-NEXT:    or $15, $15, $25
+; 32R2-NEXT:    movn $15, $3, $10
+; 32R2-NEXT:    or $12, $12, $15
+; 32R2-NEXT:    sra $3, $4, 31
+; 32R2-NEXT:    movn $8, $3, $13
+; 32R2-NEXT:    movn $8, $12, $14
+; 32R2-NEXT:    srav $4, $4, $9
+; 32R2-NEXT:    movz $8, $6, $2
+; 32R2-NEXT:    movz $1, $7, $2
+; 32R2-NEXT:    move $6, $4
+; 32R2-NEXT:    movn $6, $3, $10
+; 32R2-NEXT:    move $2, $3
+; 32R2-NEXT:    movn $2, $6, $14
+; 32R2-NEXT:    srlv $5, $5, $9
+; 32R2-NEXT:    sllv $6, $24, $11
+; 32R2-NEXT:    or $5, $6, $5
+; 32R2-NEXT:    movn $5, $4, $10
+; 32R2-NEXT:    movn $3, $5, $14
 ; 32R2-NEXT:    move $4, $8
 ; 32R2-NEXT:    jr $ra
 ; 32R2-NEXT:    move $5, $1
 ;
 ; 32R6-LABEL: ashr_i128:
 ; 32R6:       # %bb.0: # %entry
-; 32R6-NEXT:    lw $3, 28($sp)
-; 32R6-NEXT:    addiu $1, $zero, 64
-; 32R6-NEXT:    subu $1, $1, $3
-; 32R6-NEXT:    sllv $2, $5, $1
-; 32R6-NEXT:    andi $8, $1, 32
-; 32R6-NEXT:    selnez $9, $2, $8
-; 32R6-NEXT:    sllv $10, $4, $1
-; 32R6-NEXT:    not $1, $1
-; 32R6-NEXT:    srl $11, $5, 1
-; 32R6-NEXT:    srlv $1, $11, $1
-; 32R6-NEXT:    or $1, $10, $1
-; 32R6-NEXT:    seleqz $1, $1, $8
-; 32R6-NEXT:    or $1, $9, $1
-; 32R6-NEXT:    srlv $9, $7, $3
-; 32R6-NEXT:    not $10, $3
-; 32R6-NEXT:    sll $11, $6, 1
-; 32R6-NEXT:    sllv $11, $11, $10
-; 32R6-NEXT:    or $9, $11, $9
-; 32R6-NEXT:    andi $11, $3, 32
-; 32R6-NEXT:    seleqz $9, $9, $11
-; 32R6-NEXT:    srlv $12, $6, $3
-; 32R6-NEXT:    selnez $13, $12, $11
-; 32R6-NEXT:    seleqz $12, $12, $11
-; 32R6-NEXT:    or $1, $12, $1
-; 32R6-NEXT:    seleqz $2, $2, $8
-; 32R6-NEXT:    or $8, $13, $9
-; 32R6-NEXT:    addiu $9, $3, -64
-; 32R6-NEXT:    srlv $12, $5, $9
-; 32R6-NEXT:    sll $13, $4, 1
-; 32R6-NEXT:    not $14, $9
-; 32R6-NEXT:    sllv $14, $13, $14
-; 32R6-NEXT:    sltiu $15, $3, 64
-; 32R6-NEXT:    or $2, $8, $2
-; 32R6-NEXT:    selnez $1, $1, $15
-; 32R6-NEXT:    or $8, $14, $12
-; 32R6-NEXT:    srav $12, $4, $9
-; 32R6-NEXT:    andi $9, $9, 32
-; 32R6-NEXT:    seleqz $14, $12, $9
-; 32R6-NEXT:    sra $24, $4, 31
-; 32R6-NEXT:    selnez $25, $24, $9
-; 32R6-NEXT:    seleqz $8, $8, $9
-; 32R6-NEXT:    or $14, $25, $14
-; 32R6-NEXT:    seleqz $14, $14, $15
-; 32R6-NEXT:    selnez $9, $12, $9
-; 32R6-NEXT:    seleqz $12, $24, $15
-; 32R6-NEXT:    or $1, $1, $14
-; 32R6-NEXT:    selnez $14, $1, $3
-; 32R6-NEXT:    selnez $1, $2, $15
-; 32R6-NEXT:    or $2, $9, $8
-; 32R6-NEXT:    srav $8, $4, $3
-; 32R6-NEXT:    seleqz $4, $8, $11
-; 32R6-NEXT:    selnez $9, $24, $11
+; 32R6-NEXT:    addiu $sp, $sp, -16
+; 32R6-NEXT:    .cfi_def_cfa_offset 16
+; 32R6-NEXT:    sw $19, 12($sp) # 4-byte Folded Spill
+; 32R6-NEXT:    sw $18, 8($sp) # 4-byte Folded Spill
+; 32R6-NEXT:    sw $17, 4($sp) # 4-byte Folded Spill
+; 32R6-NEXT:    sw $16, 0($sp) # 4-byte Folded Spill
+; 32R6-NEXT:    .cfi_offset 19, -4
+; 32R6-NEXT:    .cfi_offset 18, -8
+; 32R6-NEXT:    .cfi_offset 17, -12
+; 32R6-NEXT:    .cfi_offset 16, -16
+; 32R6-NEXT:    lw $1, 44($sp)
+; 32R6-NEXT:    addiu $2, $zero, 64
+; 32R6-NEXT:    subu $2, $2, $1
+; 32R6-NEXT:    andi $3, $2, 63
+; 32R6-NEXT:    sllv $8, $4, $3
+; 32R6-NEXT:    not $9, $3
+; 32R6-NEXT:    srl $10, $5, 1
+; 32R6-NEXT:    srlv $10, $10, $9
+; 32R6-NEXT:    or $8, $8, $10
+; 32R6-NEXT:    srav $10, $4, $3
+; 32R6-NEXT:    sllv $11, $5, $3
+; 32R6-NEXT:    andi $2, $2, 32
+; 32R6-NEXT:    selnez $12, $11, $2
+; 32R6-NEXT:    seleqz $8, $8, $2
+; 32R6-NEXT:    selnez $13, $10, $2
+; 32R6-NEXT:    srlv $14, $5, $3
+; 32R6-NEXT:    sll $15, $4, 1
+; 32R6-NEXT:    sllv $24, $15, $9
+; 32R6-NEXT:    or $14, $24, $14
+; 32R6-NEXT:    seleqz $14, $14, $2
+; 32R6-NEXT:    or $13, $13, $14
+; 32R6-NEXT:    srlv $14, $6, $3
+; 32R6-NEXT:    addiu $24, $1, -64
+; 32R6-NEXT:    sltiu $25, $1, 64
+; 32R6-NEXT:    sra $gp, $4, 31
+; 32R6-NEXT:    or $8, $12, $8
+; 32R6-NEXT:    seleqz $12, $14, $2
+; 32R6-NEXT:    seleqz $10, $10, $2
+; 32R6-NEXT:    selnez $16, $gp, $2
+; 32R6-NEXT:    seleqz $17, $gp, $25
+; 32R6-NEXT:    andi $18, $24, 32
+; 32R6-NEXT:    seleqz $19, $6, $1
+; 32R6-NEXT:    selnez $13, $13, $25
+; 32R6-NEXT:    selnez $14, $14, $2
+; 32R6-NEXT:    srlv $3, $7, $3
+; 32R6-NEXT:    sll $6, $6, 1
+; 32R6-NEXT:    sllv $6, $6, $9
+; 32R6-NEXT:    or $3, $6, $3
+; 32R6-NEXT:    seleqz $3, $3, $2
+; 32R6-NEXT:    or $3, $14, $3
+; 32R6-NEXT:    seleqz $2, $11, $2
+; 32R6-NEXT:    or $2, $3, $2
+; 32R6-NEXT:    or $3, $13, $17
+; 32R6-NEXT:    or $6, $16, $10
+; 32R6-NEXT:    seleqz $7, $7, $1
+; 32R6-NEXT:    or $8, $12, $8
+; 32R6-NEXT:    selnez $8, $8, $25
+; 32R6-NEXT:    selnez $9, $gp, $18
+; 32R6-NEXT:    andi $10, $24, 63
+; 32R6-NEXT:    srav $11, $4, $10
+; 32R6-NEXT:    seleqz $4, $11, $18
 ; 32R6-NEXT:    or $4, $9, $4
-; 32R6-NEXT:    selnez $9, $4, $15
-; 32R6-NEXT:    seleqz $2, $2, $15
-; 32R6-NEXT:    seleqz $4, $6, $3
-; 32R6-NEXT:    seleqz $6, $7, $3
-; 32R6-NEXT:    or $1, $1, $2
-; 32R6-NEXT:    selnez $1, $1, $3
-; 32R6-NEXT:    or $1, $6, $1
-; 32R6-NEXT:    or $4, $4, $14
-; 32R6-NEXT:    or $2, $9, $12
-; 32R6-NEXT:    srlv $3, $5, $3
-; 32R6-NEXT:    sllv $5, $13, $10
-; 32R6-NEXT:    or $3, $5, $3
-; 32R6-NEXT:    seleqz $3, $3, $11
-; 32R6-NEXT:    selnez $5, $8, $11
-; 32R6-NEXT:    or $3, $5, $3
-; 32R6-NEXT:    selnez $3, $3, $15
-; 32R6-NEXT:    or $3, $3, $12
+; 32R6-NEXT:    seleqz $4, $4, $25
+; 32R6-NEXT:    or $4, $8, $4
+; 32R6-NEXT:    selnez $4, $4, $1
+; 32R6-NEXT:    or $4, $19, $4
+; 32R6-NEXT:    selnez $2, $2, $25
+; 32R6-NEXT:    srlv $5, $5, $10
+; 32R6-NEXT:    not $8, $10
+; 32R6-NEXT:    sllv $8, $15, $8
+; 32R6-NEXT:    or $5, $8, $5
+; 32R6-NEXT:    seleqz $5, $5, $18
+; 32R6-NEXT:    selnez $8, $11, $18
+; 32R6-NEXT:    or $5, $8, $5
+; 32R6-NEXT:    seleqz $5, $5, $25
+; 32R6-NEXT:    or $2, $2, $5
+; 32R6-NEXT:    selnez $1, $2, $1
+; 32R6-NEXT:    or $5, $7, $1
+; 32R6-NEXT:    selnez $1, $6, $25
+; 32R6-NEXT:    or $2, $1, $17
+; 32R6-NEXT:    lw $16, 0($sp) # 4-byte Folded Reload
+; 32R6-NEXT:    lw $17, 4($sp) # 4-byte Folded Reload
+; 32R6-NEXT:    lw $18, 8($sp) # 4-byte Folded Reload
+; 32R6-NEXT:    lw $19, 12($sp) # 4-byte Folded Reload
 ; 32R6-NEXT:    jr $ra
-; 32R6-NEXT:    move $5, $1
+; 32R6-NEXT:    addiu $sp, $sp, 16
 ;
 ; MIPS3-LABEL: ashr_i128:
 ; MIPS3:       # %bb.0: # %entry
@@ -766,169 +784,182 @@
 ; MMR3-NEXT:    .cfi_offset 17, -4
 ; MMR3-NEXT:    .cfi_offset 16, -8
 ; MMR3-NEXT:    move $8, $7
-; MMR3-NEXT:    sw $6, 32($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $5, 36($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    lw $16, 76($sp)
-; MMR3-NEXT:    srlv $4, $7, $16
-; MMR3-NEXT:    not16 $3, $16
-; MMR3-NEXT:    sw $3, 24($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sll16 $2, $6, 1
-; MMR3-NEXT:    sllv $3, $2, $3
+; MMR3-NEXT:    sw $5, 32($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sw $4, 20($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    lw $17, 76($sp)
 ; MMR3-NEXT:    li16 $2, 64
-; MMR3-NEXT:    or16 $3, $4
-; MMR3-NEXT:    srlv $6, $6, $16
-; MMR3-NEXT:    sw $6, 12($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    subu16 $7, $2, $16
+; MMR3-NEXT:    subu16 $2, $2, $17
+; MMR3-NEXT:    andi16 $7, $2, 63
 ; MMR3-NEXT:    sllv $9, $5, $7
-; MMR3-NEXT:    andi16 $2, $7, 32
-; MMR3-NEXT:    sw $2, 28($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    andi16 $5, $16, 32
-; MMR3-NEXT:    sw $5, 16($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    move $4, $9
-; MMR3-NEXT:    li16 $17, 0
-; MMR3-NEXT:    movn $4, $17, $2
-; MMR3-NEXT:    movn $3, $6, $5
-; MMR3-NEXT:    addiu $2, $16, -64
-; MMR3-NEXT:    lw $5, 36($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    srlv $5, $5, $2
-; MMR3-NEXT:    sw $5, 20($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    andi16 $4, $2, 32
+; MMR3-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    li16 $2, 0
+; MMR3-NEXT:    move $16, $9
+; MMR3-NEXT:    movn $16, $2, $4
+; MMR3-NEXT:    srlv $3, $8, $7
+; MMR3-NEXT:    not16 $5, $7
+; MMR3-NEXT:    sw $5, 36($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sw $6, 28($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sll16 $2, $6, 1
+; MMR3-NEXT:    sllv $2, $2, $5
+; MMR3-NEXT:    or16 $2, $3
+; MMR3-NEXT:    srlv $3, $6, $7
+; MMR3-NEXT:    sw $3, 12($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    movn $2, $3, $4
+; MMR3-NEXT:    sw $17, 16($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    addiu $3, $17, -64
+; MMR3-NEXT:    sw $3, 4($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    andi16 $3, $3, 63
+; MMR3-NEXT:    or16 $2, $16
+; MMR3-NEXT:    lw $5, 32($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $4, $5, $3
+; MMR3-NEXT:    sw $4, 0($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    lw $6, 20($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sll16 $4, $6, 1
+; MMR3-NEXT:    sw $4, 24($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    not16 $16, $3
+; MMR3-NEXT:    sllv $16, $4, $16
+; MMR3-NEXT:    lw $4, 0($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    or16 $16, $4
+; MMR3-NEXT:    srav $1, $6, $3
+; MMR3-NEXT:    lw $3, 4($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    andi16 $3, $3, 32
+; MMR3-NEXT:    sw $3, 4($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    movn $16, $1, $3
+; MMR3-NEXT:    sltiu $10, $17, 64
+; MMR3-NEXT:    movn $16, $2, $10
 ; MMR3-NEXT:    lw $17, 8($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    sll16 $6, $17, 1
-; MMR3-NEXT:    sw $6, 4($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    not16 $5, $2
-; MMR3-NEXT:    sllv $5, $6, $5
-; MMR3-NEXT:    or16 $3, $4
-; MMR3-NEXT:    lw $4, 20($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    or16 $5, $4
-; MMR3-NEXT:    srav $1, $17, $2
-; MMR3-NEXT:    andi16 $2, $2, 32
-; MMR3-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    movn $5, $1, $2
-; MMR3-NEXT:    sllv $2, $17, $7
-; MMR3-NEXT:    not16 $4, $7
-; MMR3-NEXT:    lw $7, 36($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    srl16 $6, $7, 1
-; MMR3-NEXT:    srlv $6, $6, $4
-; MMR3-NEXT:    sltiu $10, $16, 64
-; MMR3-NEXT:    movn $5, $3, $10
-; MMR3-NEXT:    or16 $6, $2
-; MMR3-NEXT:    srlv $2, $7, $16
-; MMR3-NEXT:    lw $3, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lw $4, 12($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    li16 $2, 0
+; MMR3-NEXT:    movn $4, $2, $17
+; MMR3-NEXT:    sllv $2, $6, $7
+; MMR3-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    srl16 $3, $5, 1
+; MMR3-NEXT:    lw $2, 36($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $2, $3, $2
+; MMR3-NEXT:    lw $3, 12($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    or16 $2, $3
+; MMR3-NEXT:    movn $2, $9, $17
+; MMR3-NEXT:    or16 $2, $4
+; MMR3-NEXT:    sra $3, $6, 31
 ; MMR3-NEXT:    lw $4, 4($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    sllv $3, $4, $3
-; MMR3-NEXT:    or16 $3, $2
-; MMR3-NEXT:    srav $11, $17, $16
-; MMR3-NEXT:    lw $4, 16($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $3, $11, $4
-; MMR3-NEXT:    sra $2, $17, 31
-; MMR3-NEXT:    movz $5, $8, $16
-; MMR3-NEXT:    move $8, $2
-; MMR3-NEXT:    movn $8, $3, $10
-; MMR3-NEXT:    lw $3, 28($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $6, $9, $3
-; MMR3-NEXT:    li16 $3, 0
-; MMR3-NEXT:    lw $7, 12($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $7, $3, $4
+; MMR3-NEXT:    movn $1, $3, $4
+; MMR3-NEXT:    movn $1, $2, $10
+; MMR3-NEXT:    srav $4, $6, $7
+; MMR3-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lw $5, 28($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    movz $1, $5, $2
+; MMR3-NEXT:    movz $16, $8, $2
+; MMR3-NEXT:    move $6, $4
+; MMR3-NEXT:    movn $6, $3, $17
+; MMR3-NEXT:    move $2, $3
+; MMR3-NEXT:    movn $2, $6, $10
+; MMR3-NEXT:    lw $5, 32($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $6, $5, $7
+; MMR3-NEXT:    lw $5, 36($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lw $7, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sllv $7, $7, $5
 ; MMR3-NEXT:    or16 $7, $6
-; MMR3-NEXT:    lw $3, 20($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $1, $2, $3
-; MMR3-NEXT:    movn $1, $7, $10
-; MMR3-NEXT:    lw $3, 32($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movz $1, $3, $16
-; MMR3-NEXT:    movn $11, $2, $4
-; MMR3-NEXT:    movn $2, $11, $10
-; MMR3-NEXT:    move $3, $8
+; MMR3-NEXT:    movn $7, $4, $17
+; MMR3-NEXT:    movn $3, $7, $10
 ; MMR3-NEXT:    move $4, $1
+; MMR3-NEXT:    move $5, $16
 ; MMR3-NEXT:    lwp $16, 40($sp)
 ; MMR3-NEXT:    addiusp 48
 ; MMR3-NEXT:    jrc $ra
 ;
 ; MMR6-LABEL: ashr_i128:
 ; MMR6:       # %bb.0: # %entry
-; MMR6-NEXT:    addiu $sp, $sp, -16
-; MMR6-NEXT:    .cfi_def_cfa_offset 16
-; MMR6-NEXT:    sw $17, 12($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    sw $16, 8($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    addiu $sp, $sp, -24
+; MMR6-NEXT:    .cfi_def_cfa_offset 24
+; MMR6-NEXT:    sw $17, 20($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    sw $16, 16($sp) # 4-byte Folded Spill
 ; MMR6-NEXT:    .cfi_offset 17, -4
 ; MMR6-NEXT:    .cfi_offset 16, -8
 ; MMR6-NEXT:    move $1, $7
-; MMR6-NEXT:    lw $3, 44($sp)
+; MMR6-NEXT:    move $17, $5
+; MMR6-NEXT:    move $5, $4
+; MMR6-NEXT:    lw $7, 52($sp)
 ; MMR6-NEXT:    li16 $2, 64
-; MMR6-NEXT:    subu16 $7, $2, $3
-; MMR6-NEXT:    sllv $8, $5, $7
-; MMR6-NEXT:    andi16 $2, $7, 32
-; MMR6-NEXT:    selnez $9, $8, $2
-; MMR6-NEXT:    sllv $10, $4, $7
-; MMR6-NEXT:    not16 $7, $7
-; MMR6-NEXT:    srl16 $16, $5, 1
-; MMR6-NEXT:    srlv $7, $16, $7
-; MMR6-NEXT:    or $7, $10, $7
-; MMR6-NEXT:    seleqz $7, $7, $2
-; MMR6-NEXT:    or $7, $9, $7
-; MMR6-NEXT:    srlv $9, $1, $3
+; MMR6-NEXT:    subu16 $2, $2, $7
+; MMR6-NEXT:    andi16 $3, $2, 63
+; MMR6-NEXT:    sllv $8, $4, $3
 ; MMR6-NEXT:    not16 $16, $3
-; MMR6-NEXT:    sw $16, 4($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    sll16 $17, $6, 1
-; MMR6-NEXT:    sllv $10, $17, $16
-; MMR6-NEXT:    or $9, $10, $9
-; MMR6-NEXT:    andi16 $17, $3, 32
-; MMR6-NEXT:    seleqz $9, $9, $17
-; MMR6-NEXT:    srlv $10, $6, $3
-; MMR6-NEXT:    selnez $11, $10, $17
-; MMR6-NEXT:    seleqz $10, $10, $17
-; MMR6-NEXT:    or $10, $10, $7
-; MMR6-NEXT:    seleqz $12, $8, $2
-; MMR6-NEXT:    or $8, $11, $9
-; MMR6-NEXT:    addiu $2, $3, -64
-; MMR6-NEXT:    srlv $9, $5, $2
-; MMR6-NEXT:    sll16 $7, $4, 1
-; MMR6-NEXT:    not16 $16, $2
-; MMR6-NEXT:    sllv $11, $7, $16
-; MMR6-NEXT:    sltiu $13, $3, 64
-; MMR6-NEXT:    or $8, $8, $12
-; MMR6-NEXT:    selnez $10, $10, $13
-; MMR6-NEXT:    or $9, $11, $9
-; MMR6-NEXT:    srav $11, $4, $2
+; MMR6-NEXT:    move $4, $17
+; MMR6-NEXT:    sw $17, 8($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    srl16 $17, $17, 1
+; MMR6-NEXT:    srlv $9, $17, $16
+; MMR6-NEXT:    or $8, $8, $9
+; MMR6-NEXT:    srav $9, $5, $3
+; MMR6-NEXT:    sllv $10, $4, $3
 ; MMR6-NEXT:    andi16 $2, $2, 32
-; MMR6-NEXT:    seleqz $12, $11, $2
-; MMR6-NEXT:    sra $14, $4, 31
-; MMR6-NEXT:    selnez $15, $14, $2
+; MMR6-NEXT:    selnez $11, $10, $2
+; MMR6-NEXT:    seleqz $8, $8, $2
+; MMR6-NEXT:    selnez $12, $9, $2
+; MMR6-NEXT:    srlv $13, $4, $3
+; MMR6-NEXT:    sll16 $17, $5, 1
+; MMR6-NEXT:    sw $17, 12($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    sllv $14, $17, $16
+; MMR6-NEXT:    or $13, $14, $13
+; MMR6-NEXT:    seleqz $13, $13, $2
+; MMR6-NEXT:    or $12, $12, $13
+; MMR6-NEXT:    srlv $13, $6, $3
+; MMR6-NEXT:    addiu $4, $7, -64
+; MMR6-NEXT:    sw $4, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    sltiu $14, $7, 64
+; MMR6-NEXT:    sra $15, $5, 31
+; MMR6-NEXT:    or $8, $11, $8
+; MMR6-NEXT:    seleqz $11, $13, $2
 ; MMR6-NEXT:    seleqz $9, $9, $2
-; MMR6-NEXT:    or $12, $15, $12
-; MMR6-NEXT:    seleqz $12, $12, $13
-; MMR6-NEXT:    selnez $2, $11, $2
-; MMR6-NEXT:    seleqz $11, $14, $13
-; MMR6-NEXT:    or $10, $10, $12
-; MMR6-NEXT:    selnez $10, $10, $3
-; MMR6-NEXT:    selnez $8, $8, $13
-; MMR6-NEXT:    or $2, $2, $9
-; MMR6-NEXT:    srav $9, $4, $3
-; MMR6-NEXT:    seleqz $4, $9, $17
-; MMR6-NEXT:    selnez $12, $14, $17
-; MMR6-NEXT:    or $4, $12, $4
-; MMR6-NEXT:    selnez $12, $4, $13
-; MMR6-NEXT:    seleqz $2, $2, $13
-; MMR6-NEXT:    seleqz $4, $6, $3
-; MMR6-NEXT:    seleqz $1, $1, $3
-; MMR6-NEXT:    or $2, $8, $2
-; MMR6-NEXT:    selnez $2, $2, $3
-; MMR6-NEXT:    or $1, $1, $2
-; MMR6-NEXT:    or $4, $4, $10
-; MMR6-NEXT:    or $2, $12, $11
-; MMR6-NEXT:    srlv $3, $5, $3
-; MMR6-NEXT:    lw $5, 4($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    sllv $5, $7, $5
-; MMR6-NEXT:    or $3, $5, $3
-; MMR6-NEXT:    seleqz $3, $3, $17
-; MMR6-NEXT:    selnez $5, $9, $17
-; MMR6-NEXT:    or $3, $5, $3
-; MMR6-NEXT:    selnez $3, $3, $13
-; MMR6-NEXT:    or $3, $3, $11
-; MMR6-NEXT:    move $5, $1
+; MMR6-NEXT:    selnez $24, $15, $2
+; MMR6-NEXT:    seleqz $25, $15, $14
+; MMR6-NEXT:    andi16 $17, $4, 32
+; MMR6-NEXT:    seleqz $gp, $6, $7
+; MMR6-NEXT:    selnez $12, $12, $14
+; MMR6-NEXT:    selnez $13, $13, $2
+; MMR6-NEXT:    srlv $3, $1, $3
+; MMR6-NEXT:    sll16 $6, $6, 1
+; MMR6-NEXT:    sllv $6, $6, $16
+; MMR6-NEXT:    or $3, $6, $3
+; MMR6-NEXT:    seleqz $3, $3, $2
+; MMR6-NEXT:    or $3, $13, $3
+; MMR6-NEXT:    seleqz $2, $10, $2
+; MMR6-NEXT:    or $2, $3, $2
+; MMR6-NEXT:    or $3, $12, $25
+; MMR6-NEXT:    or $9, $24, $9
+; MMR6-NEXT:    seleqz $4, $1, $7
+; MMR6-NEXT:    or $1, $11, $8
+; MMR6-NEXT:    selnez $1, $1, $14
+; MMR6-NEXT:    selnez $8, $15, $17
+; MMR6-NEXT:    lw $6, 4($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    andi16 $6, $6, 63
+; MMR6-NEXT:    srav $5, $5, $6
+; MMR6-NEXT:    seleqz $10, $5, $17
+; MMR6-NEXT:    or $8, $8, $10
+; MMR6-NEXT:    seleqz $8, $8, $14
+; MMR6-NEXT:    or $1, $1, $8
+; MMR6-NEXT:    selnez $1, $1, $7
+; MMR6-NEXT:    or $1, $gp, $1
+; MMR6-NEXT:    selnez $2, $2, $14
 ; MMR6-NEXT:    lw $16, 8($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    lw $17, 12($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    addiu $sp, $sp, 16
+; MMR6-NEXT:    srlv $8, $16, $6
+; MMR6-NEXT:    not16 $6, $6
+; MMR6-NEXT:    lw $16, 12($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    sllv $6, $16, $6
+; MMR6-NEXT:    or $6, $6, $8
+; MMR6-NEXT:    seleqz $6, $6, $17
+; MMR6-NEXT:    selnez $5, $5, $17
+; MMR6-NEXT:    or $5, $5, $6
+; MMR6-NEXT:    seleqz $5, $5, $14
+; MMR6-NEXT:    or $2, $2, $5
+; MMR6-NEXT:    selnez $2, $2, $7
+; MMR6-NEXT:    or $5, $4, $2
+; MMR6-NEXT:    selnez $2, $9, $14
+; MMR6-NEXT:    or $2, $2, $25
+; MMR6-NEXT:    move $4, $1
+; MMR6-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    lw $17, 20($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    addiu $sp, $sp, 24
 ; MMR6-NEXT:    jrc $ra
 entry:
   %r = ashr i128 %a, %b
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll
--- a/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/lshr.ll
@@ -396,111 +396,111 @@
 define signext i128 @lshr_i128(i128 signext %a, i128 signext %b) {
 ; MIPS2-LABEL: lshr_i128:
 ; MIPS2:       # %bb.0: # %entry
-; MIPS2-NEXT:    lw $2, 28($sp)
+; MIPS2-NEXT:    lw $8, 28($sp)
 ; MIPS2-NEXT:    addiu $1, $zero, 64
-; MIPS2-NEXT:    subu $12, $1, $2
-; MIPS2-NEXT:    sllv $10, $5, $12
-; MIPS2-NEXT:    andi $15, $12, 32
-; MIPS2-NEXT:    andi $8, $2, 32
+; MIPS2-NEXT:    subu $1, $1, $8
+; MIPS2-NEXT:    sll $2, $4, 1
+; MIPS2-NEXT:    addiu $3, $8, -64
+; MIPS2-NEXT:    andi $12, $3, 63
+; MIPS2-NEXT:    andi $9, $1, 63
+; MIPS2-NEXT:    andi $10, $1, 32
+; MIPS2-NEXT:    srlv $11, $4, $12
+; MIPS2-NEXT:    andi $1, $3, 32
+; MIPS2-NEXT:    bnez $1, $BB5_2
 ; MIPS2-NEXT:    addiu $3, $zero, 0
-; MIPS2-NEXT:    bnez $15, $BB5_2
-; MIPS2-NEXT:    addiu $13, $zero, 0
 ; MIPS2-NEXT:  # %bb.1: # %entry
-; MIPS2-NEXT:    move $13, $10
-; MIPS2-NEXT:  $BB5_2: # %entry
-; MIPS2-NEXT:    not $9, $2
-; MIPS2-NEXT:    bnez $8, $BB5_5
-; MIPS2-NEXT:    srlv $24, $6, $2
-; MIPS2-NEXT:  # %bb.3: # %entry
-; MIPS2-NEXT:    sll $1, $6, 1
-; MIPS2-NEXT:    srlv $11, $7, $2
-; MIPS2-NEXT:    sllv $1, $1, $9
-; MIPS2-NEXT:    or $14, $1, $11
-; MIPS2-NEXT:    bnez $15, $BB5_7
-; MIPS2-NEXT:    move $11, $24
+; MIPS2-NEXT:    not $1, $12
+; MIPS2-NEXT:    srlv $12, $5, $12
+; MIPS2-NEXT:    sllv $1, $2, $1
+; MIPS2-NEXT:    or $12, $1, $12
+; MIPS2-NEXT:    b $BB5_3
+; MIPS2-NEXT:    move $13, $11
+; MIPS2-NEXT:  $BB5_2:
+; MIPS2-NEXT:    addiu $13, $zero, 0
+; MIPS2-NEXT:    move $12, $11
+; MIPS2-NEXT:  $BB5_3: # %entry
+; MIPS2-NEXT:    not $11, $9
+; MIPS2-NEXT:    bnez $10, $BB5_5
+; MIPS2-NEXT:    srlv $15, $6, $9
 ; MIPS2-NEXT:  # %bb.4: # %entry
+; MIPS2-NEXT:    sll $1, $6, 1
+; MIPS2-NEXT:    srlv $14, $7, $9
+; MIPS2-NEXT:    sllv $1, $1, $11
+; MIPS2-NEXT:    or $25, $1, $14
 ; MIPS2-NEXT:    b $BB5_6
-; MIPS2-NEXT:    nop
+; MIPS2-NEXT:    move $14, $15
 ; MIPS2-NEXT:  $BB5_5:
-; MIPS2-NEXT:    addiu $11, $zero, 0
-; MIPS2-NEXT:    bnez $15, $BB5_7
-; MIPS2-NEXT:    move $14, $24
+; MIPS2-NEXT:    addiu $14, $zero, 0
+; MIPS2-NEXT:    move $25, $15
 ; MIPS2-NEXT:  $BB5_6: # %entry
-; MIPS2-NEXT:    sllv $1, $4, $12
-; MIPS2-NEXT:    not $10, $12
-; MIPS2-NEXT:    srl $12, $5, 1
-; MIPS2-NEXT:    srlv $10, $12, $10
-; MIPS2-NEXT:    or $10, $1, $10
-; MIPS2-NEXT:  $BB5_7: # %entry
-; MIPS2-NEXT:    addiu $15, $2, -64
-; MIPS2-NEXT:    sll $12, $4, 1
-; MIPS2-NEXT:    andi $1, $15, 32
-; MIPS2-NEXT:    bnez $1, $BB5_10
-; MIPS2-NEXT:    srlv $25, $4, $15
-; MIPS2-NEXT:  # %bb.8: # %entry
-; MIPS2-NEXT:    srlv $1, $5, $15
-; MIPS2-NEXT:    not $15, $15
-; MIPS2-NEXT:    sllv $15, $12, $15
-; MIPS2-NEXT:    or $24, $15, $1
-; MIPS2-NEXT:    move $15, $25
-; MIPS2-NEXT:    sltiu $25, $2, 64
-; MIPS2-NEXT:    beqz $25, $BB5_12
-; MIPS2-NEXT:    nop
-; MIPS2-NEXT:  # %bb.9: # %entry
-; MIPS2-NEXT:    b $BB5_11
+; MIPS2-NEXT:    sllv $15, $5, $9
+; MIPS2-NEXT:    beqz $10, $BB5_16
+; MIPS2-NEXT:    addiu $gp, $zero, 0
+; MIPS2-NEXT:  # %bb.7: # %entry
+; MIPS2-NEXT:    sltiu $24, $8, 64
+; MIPS2-NEXT:    bnez $24, $BB5_17
 ; MIPS2-NEXT:    nop
-; MIPS2-NEXT:  $BB5_10:
-; MIPS2-NEXT:    move $24, $25
-; MIPS2-NEXT:    sltiu $25, $2, 64
-; MIPS2-NEXT:    beqz $25, $BB5_12
-; MIPS2-NEXT:    addiu $15, $zero, 0
-; MIPS2-NEXT:  $BB5_11:
-; MIPS2-NEXT:    or $24, $14, $13
-; MIPS2-NEXT:  $BB5_12: # %entry
-; MIPS2-NEXT:    sltiu $13, $2, 1
-; MIPS2-NEXT:    beqz $13, $BB5_19
+; MIPS2-NEXT:  $BB5_8: # %entry
+; MIPS2-NEXT:    beqz $10, $BB5_18
 ; MIPS2-NEXT:    nop
-; MIPS2-NEXT:  # %bb.13: # %entry
-; MIPS2-NEXT:    bnez $25, $BB5_20
+; MIPS2-NEXT:  $BB5_9: # %entry
+; MIPS2-NEXT:    bnez $24, $BB5_19
+; MIPS2-NEXT:    sltiu $25, $8, 1
+; MIPS2-NEXT:  $BB5_10: # %entry
+; MIPS2-NEXT:    beqz $25, $BB5_20
 ; MIPS2-NEXT:    nop
-; MIPS2-NEXT:  $BB5_14: # %entry
-; MIPS2-NEXT:    bnez $13, $BB5_16
-; MIPS2-NEXT:    addiu $10, $zero, 63
-; MIPS2-NEXT:  $BB5_15: # %entry
-; MIPS2-NEXT:    move $6, $15
-; MIPS2-NEXT:  $BB5_16: # %entry
-; MIPS2-NEXT:    sltu $10, $10, $2
-; MIPS2-NEXT:    bnez $8, $BB5_22
-; MIPS2-NEXT:    srlv $11, $4, $2
-; MIPS2-NEXT:  # %bb.17: # %entry
-; MIPS2-NEXT:    srlv $1, $5, $2
-; MIPS2-NEXT:    sllv $2, $12, $9
+; MIPS2-NEXT:  $BB5_11: # %entry
+; MIPS2-NEXT:    bnez $25, $BB5_13
+; MIPS2-NEXT:    addiu $13, $zero, 63
+; MIPS2-NEXT:  $BB5_12: # %entry
+; MIPS2-NEXT:    move $7, $12
+; MIPS2-NEXT:  $BB5_13: # %entry
+; MIPS2-NEXT:    sltu $8, $13, $8
+; MIPS2-NEXT:    bnez $10, $BB5_22
+; MIPS2-NEXT:    srlv $12, $4, $9
+; MIPS2-NEXT:  # %bb.14: # %entry
+; MIPS2-NEXT:    srlv $1, $5, $9
+; MIPS2-NEXT:    sllv $2, $2, $11
 ; MIPS2-NEXT:    or $4, $2, $1
-; MIPS2-NEXT:    move $5, $11
-; MIPS2-NEXT:    bnez $10, $BB5_24
+; MIPS2-NEXT:    move $5, $12
+; MIPS2-NEXT:    bnez $8, $BB5_24
 ; MIPS2-NEXT:    addiu $2, $zero, 0
-; MIPS2-NEXT:  # %bb.18: # %entry
+; MIPS2-NEXT:  # %bb.15: # %entry
 ; MIPS2-NEXT:    b $BB5_23
 ; MIPS2-NEXT:    nop
-; MIPS2-NEXT:  $BB5_19: # %entry
-; MIPS2-NEXT:    beqz $25, $BB5_14
-; MIPS2-NEXT:    move $7, $24
-; MIPS2-NEXT:  $BB5_20:
-; MIPS2-NEXT:    or $15, $11, $10
-; MIPS2-NEXT:    bnez $13, $BB5_16
-; MIPS2-NEXT:    addiu $10, $zero, 63
-; MIPS2-NEXT:  # %bb.21:
-; MIPS2-NEXT:    b $BB5_15
+; MIPS2-NEXT:  $BB5_16: # %entry
+; MIPS2-NEXT:    sltiu $24, $8, 64
+; MIPS2-NEXT:    beqz $24, $BB5_8
+; MIPS2-NEXT:    move $gp, $15
+; MIPS2-NEXT:  $BB5_17:
+; MIPS2-NEXT:    bnez $10, $BB5_9
+; MIPS2-NEXT:    or $12, $25, $gp
+; MIPS2-NEXT:  $BB5_18: # %entry
+; MIPS2-NEXT:    sllv $1, $4, $9
+; MIPS2-NEXT:    srl $15, $5, 1
+; MIPS2-NEXT:    srlv $15, $15, $11
+; MIPS2-NEXT:    or $15, $1, $15
+; MIPS2-NEXT:    beqz $24, $BB5_10
+; MIPS2-NEXT:    sltiu $25, $8, 1
+; MIPS2-NEXT:  $BB5_19:
+; MIPS2-NEXT:    bnez $25, $BB5_11
+; MIPS2-NEXT:    or $13, $14, $15
+; MIPS2-NEXT:  $BB5_20: # %entry
+; MIPS2-NEXT:    move $6, $13
+; MIPS2-NEXT:    bnez $25, $BB5_13
+; MIPS2-NEXT:    addiu $13, $zero, 63
+; MIPS2-NEXT:  # %bb.21: # %entry
+; MIPS2-NEXT:    b $BB5_12
 ; MIPS2-NEXT:    nop
 ; MIPS2-NEXT:  $BB5_22:
 ; MIPS2-NEXT:    addiu $5, $zero, 0
-; MIPS2-NEXT:    move $4, $11
-; MIPS2-NEXT:    bnez $10, $BB5_24
+; MIPS2-NEXT:    move $4, $12
+; MIPS2-NEXT:    bnez $8, $BB5_24
 ; MIPS2-NEXT:    addiu $2, $zero, 0
 ; MIPS2-NEXT:  $BB5_23: # %entry
 ; MIPS2-NEXT:    move $2, $5
 ; MIPS2-NEXT:  $BB5_24: # %entry
-; MIPS2-NEXT:    bnez $10, $BB5_26
+; MIPS2-NEXT:    bnez $8, $BB5_26
 ; MIPS2-NEXT:    nop
 ; MIPS2-NEXT:  # %bb.25: # %entry
 ; MIPS2-NEXT:    move $3, $4
@@ -511,184 +511,180 @@
 ;
 ; MIPS32-LABEL: lshr_i128:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $9, 28($sp)
+; MIPS32-NEXT:    lw $2, 28($sp)
 ; MIPS32-NEXT:    addiu $1, $zero, 64
-; MIPS32-NEXT:    subu $2, $1, $9
-; MIPS32-NEXT:    sllv $10, $5, $2
-; MIPS32-NEXT:    andi $11, $2, 32
-; MIPS32-NEXT:    move $1, $10
-; MIPS32-NEXT:    movn $1, $zero, $11
-; MIPS32-NEXT:    srlv $3, $7, $9
-; MIPS32-NEXT:    not $12, $9
-; MIPS32-NEXT:    sll $8, $6, 1
-; MIPS32-NEXT:    sllv $8, $8, $12
-; MIPS32-NEXT:    or $3, $8, $3
-; MIPS32-NEXT:    srlv $13, $6, $9
-; MIPS32-NEXT:    andi $14, $9, 32
-; MIPS32-NEXT:    movn $3, $13, $14
-; MIPS32-NEXT:    addiu $15, $9, -64
-; MIPS32-NEXT:    or $3, $3, $1
-; MIPS32-NEXT:    srlv $1, $5, $15
+; MIPS32-NEXT:    subu $1, $1, $2
+; MIPS32-NEXT:    andi $3, $1, 63
+; MIPS32-NEXT:    sllv $9, $5, $3
+; MIPS32-NEXT:    andi $10, $1, 32
+; MIPS32-NEXT:    move $1, $9
+; MIPS32-NEXT:    movn $1, $zero, $10
+; MIPS32-NEXT:    srlv $8, $7, $3
+; MIPS32-NEXT:    not $11, $3
+; MIPS32-NEXT:    sll $12, $6, 1
+; MIPS32-NEXT:    sllv $12, $12, $11
+; MIPS32-NEXT:    or $8, $12, $8
+; MIPS32-NEXT:    srlv $12, $6, $3
+; MIPS32-NEXT:    movn $8, $12, $10
+; MIPS32-NEXT:    addiu $13, $2, -64
+; MIPS32-NEXT:    andi $14, $13, 63
+; MIPS32-NEXT:    or $15, $8, $1
+; MIPS32-NEXT:    srlv $1, $5, $14
 ; MIPS32-NEXT:    sll $24, $4, 1
-; MIPS32-NEXT:    not $8, $15
+; MIPS32-NEXT:    not $8, $14
 ; MIPS32-NEXT:    sllv $8, $24, $8
 ; MIPS32-NEXT:    or $1, $8, $1
-; MIPS32-NEXT:    srlv $8, $4, $15
-; MIPS32-NEXT:    andi $15, $15, 32
-; MIPS32-NEXT:    movn $1, $8, $15
-; MIPS32-NEXT:    sltiu $25, $9, 64
-; MIPS32-NEXT:    movn $1, $3, $25
-; MIPS32-NEXT:    sllv $3, $4, $2
-; MIPS32-NEXT:    not $2, $2
-; MIPS32-NEXT:    srl $gp, $5, 1
-; MIPS32-NEXT:    srlv $2, $gp, $2
-; MIPS32-NEXT:    or $gp, $3, $2
-; MIPS32-NEXT:    srlv $2, $5, $9
-; MIPS32-NEXT:    sllv $3, $24, $12
-; MIPS32-NEXT:    or $3, $3, $2
-; MIPS32-NEXT:    srlv $2, $4, $9
-; MIPS32-NEXT:    movn $3, $2, $14
-; MIPS32-NEXT:    movz $1, $7, $9
-; MIPS32-NEXT:    movz $3, $zero, $25
-; MIPS32-NEXT:    movn $gp, $10, $11
-; MIPS32-NEXT:    movn $13, $zero, $14
-; MIPS32-NEXT:    or $4, $13, $gp
-; MIPS32-NEXT:    movn $8, $zero, $15
-; MIPS32-NEXT:    movn $8, $4, $25
-; MIPS32-NEXT:    movz $8, $6, $9
-; MIPS32-NEXT:    movn $2, $zero, $14
-; MIPS32-NEXT:    movz $2, $zero, $25
+; MIPS32-NEXT:    srlv $8, $4, $14
+; MIPS32-NEXT:    andi $13, $13, 32
+; MIPS32-NEXT:    movn $1, $8, $13
+; MIPS32-NEXT:    sltiu $14, $2, 64
+; MIPS32-NEXT:    movn $1, $15, $14
+; MIPS32-NEXT:    movn $12, $zero, $10
+; MIPS32-NEXT:    sllv $15, $4, $3
+; MIPS32-NEXT:    srl $25, $5, 1
+; MIPS32-NEXT:    srlv $25, $25, $11
+; MIPS32-NEXT:    or $15, $15, $25
+; MIPS32-NEXT:    movn $15, $9, $10
+; MIPS32-NEXT:    or $9, $12, $15
+; MIPS32-NEXT:    movn $8, $zero, $13
+; MIPS32-NEXT:    movn $8, $9, $14
+; MIPS32-NEXT:    srlv $4, $4, $3
+; MIPS32-NEXT:    movz $8, $6, $2
+; MIPS32-NEXT:    movz $1, $7, $2
+; MIPS32-NEXT:    move $2, $4
+; MIPS32-NEXT:    movn $2, $zero, $10
+; MIPS32-NEXT:    movz $2, $zero, $14
+; MIPS32-NEXT:    srlv $3, $5, $3
+; MIPS32-NEXT:    sllv $5, $24, $11
+; MIPS32-NEXT:    or $3, $5, $3
+; MIPS32-NEXT:    movn $3, $4, $10
+; MIPS32-NEXT:    movz $3, $zero, $14
 ; MIPS32-NEXT:    move $4, $8
 ; MIPS32-NEXT:    jr $ra
 ; MIPS32-NEXT:    move $5, $1
 ;
 ; MIPS32R2-LABEL: lshr_i128:
 ; MIPS32R2:       # %bb.0: # %entry
-; MIPS32R2-NEXT:    lw $9, 28($sp)
+; MIPS32R2-NEXT:    lw $2, 28($sp)
 ; MIPS32R2-NEXT:    addiu $1, $zero, 64
-; MIPS32R2-NEXT:    subu $2, $1, $9
-; MIPS32R2-NEXT:    sllv $10, $5, $2
-; MIPS32R2-NEXT:    andi $11, $2, 32
-; MIPS32R2-NEXT:    move $1, $10
-; MIPS32R2-NEXT:    movn $1, $zero, $11
-; MIPS32R2-NEXT:    srlv $3, $7, $9
-; MIPS32R2-NEXT:    not $12, $9
-; MIPS32R2-NEXT:    sll $8, $6, 1
-; MIPS32R2-NEXT:    sllv $8, $8, $12
-; MIPS32R2-NEXT:    or $3, $8, $3
-; MIPS32R2-NEXT:    srlv $13, $6, $9
-; MIPS32R2-NEXT:    andi $14, $9, 32
-; MIPS32R2-NEXT:    movn $3, $13, $14
-; MIPS32R2-NEXT:    addiu $15, $9, -64
-; MIPS32R2-NEXT:    or $3, $3, $1
-; MIPS32R2-NEXT:    srlv $1, $5, $15
+; MIPS32R2-NEXT:    subu $1, $1, $2
+; MIPS32R2-NEXT:    andi $3, $1, 63
+; MIPS32R2-NEXT:    sllv $9, $5, $3
+; MIPS32R2-NEXT:    andi $10, $1, 32
+; MIPS32R2-NEXT:    move $1, $9
+; MIPS32R2-NEXT:    movn $1, $zero, $10
+; MIPS32R2-NEXT:    srlv $8, $7, $3
+; MIPS32R2-NEXT:    not $11, $3
+; MIPS32R2-NEXT:    sll $12, $6, 1
+; MIPS32R2-NEXT:    sllv $12, $12, $11
+; MIPS32R2-NEXT:    or $8, $12, $8
+; MIPS32R2-NEXT:    srlv $12, $6, $3
+; MIPS32R2-NEXT:    movn $8, $12, $10
+; MIPS32R2-NEXT:    addiu $13, $2, -64
+; MIPS32R2-NEXT:    andi $14, $13, 63
+; MIPS32R2-NEXT:    or $15, $8, $1
+; MIPS32R2-NEXT:    srlv $1, $5, $14
 ; MIPS32R2-NEXT:    sll $24, $4, 1
-; MIPS32R2-NEXT:    not $8, $15
+; MIPS32R2-NEXT:    not $8, $14
 ; MIPS32R2-NEXT:    sllv $8, $24, $8
 ; MIPS32R2-NEXT:    or $1, $8, $1
-; MIPS32R2-NEXT:    srlv $8, $4, $15
-; MIPS32R2-NEXT:    andi $15, $15, 32
-; MIPS32R2-NEXT:    movn $1, $8, $15
-; MIPS32R2-NEXT:    sltiu $25, $9, 64
-; MIPS32R2-NEXT:    movn $1, $3, $25
-; MIPS32R2-NEXT:    sllv $3, $4, $2
-; MIPS32R2-NEXT:    not $2, $2
-; MIPS32R2-NEXT:    srl $gp, $5, 1
-; MIPS32R2-NEXT:    srlv $2, $gp, $2
-; MIPS32R2-NEXT:    or $gp, $3, $2
-; MIPS32R2-NEXT:    srlv $2, $5, $9
-; MIPS32R2-NEXT:    sllv $3, $24, $12
-; MIPS32R2-NEXT:    or $3, $3, $2
-; MIPS32R2-NEXT:    srlv $2, $4, $9
-; MIPS32R2-NEXT:    movn $3, $2, $14
-; MIPS32R2-NEXT:    movz $1, $7, $9
-; MIPS32R2-NEXT:    movz $3, $zero, $25
-; MIPS32R2-NEXT:    movn $gp, $10, $11
-; MIPS32R2-NEXT:    movn $13, $zero, $14
-; MIPS32R2-NEXT:    or $4, $13, $gp
-; MIPS32R2-NEXT:    movn $8, $zero, $15
-; MIPS32R2-NEXT:    movn $8, $4, $25
-; MIPS32R2-NEXT:    movz $8, $6, $9
-; MIPS32R2-NEXT:    movn $2, $zero, $14
-; MIPS32R2-NEXT:    movz $2, $zero, $25
+; MIPS32R2-NEXT:    srlv $8, $4, $14
+; MIPS32R2-NEXT:    andi $13, $13, 32
+; MIPS32R2-NEXT:    movn $1, $8, $13
+; MIPS32R2-NEXT:    sltiu $14, $2, 64
+; MIPS32R2-NEXT:    movn $1, $15, $14
+; MIPS32R2-NEXT:    movn $12, $zero, $10
+; MIPS32R2-NEXT:    sllv $15, $4, $3
+; MIPS32R2-NEXT:    srl $25, $5, 1
+; MIPS32R2-NEXT:    srlv $25, $25, $11
+; MIPS32R2-NEXT:    or $15, $15, $25
+; MIPS32R2-NEXT:    movn $15, $9, $10
+; MIPS32R2-NEXT:    or $9, $12, $15
+; MIPS32R2-NEXT:    movn $8, $zero, $13
+; MIPS32R2-NEXT:    movn $8, $9, $14
+; MIPS32R2-NEXT:    srlv $4, $4, $3
+; MIPS32R2-NEXT:    movz $8, $6, $2
+; MIPS32R2-NEXT:    movz $1, $7, $2
+; MIPS32R2-NEXT:    move $2, $4
+; MIPS32R2-NEXT:    movn $2, $zero, $10
+; MIPS32R2-NEXT:    movz $2, $zero, $14
+; MIPS32R2-NEXT:    srlv $3, $5, $3
+; MIPS32R2-NEXT:    sllv $5, $24, $11
+; MIPS32R2-NEXT:    or $3, $5, $3
+; MIPS32R2-NEXT:    movn $3, $4, $10
+; MIPS32R2-NEXT:    movz $3, $zero, $14
 ; MIPS32R2-NEXT:    move $4, $8
 ; MIPS32R2-NEXT:    jr $ra
 ; MIPS32R2-NEXT:    move $5, $1
 ;
 ; MIPS32R6-LABEL: lshr_i128:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    addiu $sp, $sp, -8
-; MIPS32R6-NEXT:    .cfi_def_cfa_offset 8
-; MIPS32R6-NEXT:    sw $16, 4($sp) # 4-byte Folded Spill
-; MIPS32R6-NEXT:    .cfi_offset 16, -4
-; MIPS32R6-NEXT:    lw $1, 36($sp)
-; MIPS32R6-NEXT:    srlv $2, $7, $1
-; MIPS32R6-NEXT:    not $3, $1
-; MIPS32R6-NEXT:    sll $8, $6, 1
-; MIPS32R6-NEXT:    sllv $8, $8, $3
-; MIPS32R6-NEXT:    or $2, $8, $2
-; MIPS32R6-NEXT:    addiu $8, $1, -64
-; MIPS32R6-NEXT:    srlv $9, $5, $8
-; MIPS32R6-NEXT:    sll $10, $4, 1
-; MIPS32R6-NEXT:    not $11, $8
-; MIPS32R6-NEXT:    sllv $11, $10, $11
-; MIPS32R6-NEXT:    andi $12, $1, 32
-; MIPS32R6-NEXT:    seleqz $2, $2, $12
+; MIPS32R6-NEXT:    lw $1, 28($sp)
+; MIPS32R6-NEXT:    addiu $2, $zero, 64
+; MIPS32R6-NEXT:    subu $2, $2, $1
+; MIPS32R6-NEXT:    andi $3, $2, 63
+; MIPS32R6-NEXT:    srlv $8, $4, $3
+; MIPS32R6-NEXT:    andi $2, $2, 32
+; MIPS32R6-NEXT:    srlv $9, $7, $3
+; MIPS32R6-NEXT:    not $10, $3
+; MIPS32R6-NEXT:    sll $11, $6, 1
+; MIPS32R6-NEXT:    sllv $11, $11, $10
 ; MIPS32R6-NEXT:    or $9, $11, $9
-; MIPS32R6-NEXT:    srlv $11, $6, $1
-; MIPS32R6-NEXT:    selnez $13, $11, $12
-; MIPS32R6-NEXT:    addiu $14, $zero, 64
-; MIPS32R6-NEXT:    subu $14, $14, $1
-; MIPS32R6-NEXT:    sllv $15, $5, $14
-; MIPS32R6-NEXT:    andi $24, $14, 32
-; MIPS32R6-NEXT:    andi $25, $8, 32
-; MIPS32R6-NEXT:    seleqz $9, $9, $25
-; MIPS32R6-NEXT:    seleqz $gp, $15, $24
-; MIPS32R6-NEXT:    or $2, $13, $2
-; MIPS32R6-NEXT:    selnez $13, $15, $24
-; MIPS32R6-NEXT:    sllv $15, $4, $14
-; MIPS32R6-NEXT:    not $14, $14
-; MIPS32R6-NEXT:    srl $16, $5, 1
-; MIPS32R6-NEXT:    srlv $14, $16, $14
-; MIPS32R6-NEXT:    or $14, $15, $14
-; MIPS32R6-NEXT:    seleqz $14, $14, $24
-; MIPS32R6-NEXT:    srlv $8, $4, $8
-; MIPS32R6-NEXT:    or $13, $13, $14
-; MIPS32R6-NEXT:    or $2, $2, $gp
-; MIPS32R6-NEXT:    srlv $5, $5, $1
-; MIPS32R6-NEXT:    selnez $14, $8, $25
-; MIPS32R6-NEXT:    sltiu $15, $1, 64
-; MIPS32R6-NEXT:    selnez $2, $2, $15
-; MIPS32R6-NEXT:    or $9, $14, $9
-; MIPS32R6-NEXT:    sllv $3, $10, $3
-; MIPS32R6-NEXT:    seleqz $10, $11, $12
-; MIPS32R6-NEXT:    or $10, $10, $13
-; MIPS32R6-NEXT:    or $3, $3, $5
-; MIPS32R6-NEXT:    seleqz $5, $9, $15
-; MIPS32R6-NEXT:    seleqz $9, $zero, $15
-; MIPS32R6-NEXT:    srlv $4, $4, $1
-; MIPS32R6-NEXT:    seleqz $11, $4, $12
-; MIPS32R6-NEXT:    selnez $11, $11, $15
+; MIPS32R6-NEXT:    selnez $11, $8, $2
+; MIPS32R6-NEXT:    srlv $12, $5, $3
+; MIPS32R6-NEXT:    sll $13, $4, 1
+; MIPS32R6-NEXT:    sllv $14, $13, $10
+; MIPS32R6-NEXT:    or $12, $14, $12
+; MIPS32R6-NEXT:    seleqz $12, $12, $2
+; MIPS32R6-NEXT:    sllv $14, $5, $3
+; MIPS32R6-NEXT:    srlv $15, $6, $3
+; MIPS32R6-NEXT:    or $11, $11, $12
+; MIPS32R6-NEXT:    selnez $12, $15, $2
+; MIPS32R6-NEXT:    seleqz $9, $9, $2
+; MIPS32R6-NEXT:    selnez $24, $14, $2
+; MIPS32R6-NEXT:    sllv $3, $4, $3
+; MIPS32R6-NEXT:    srl $25, $5, 1
+; MIPS32R6-NEXT:    srlv $10, $25, $10
+; MIPS32R6-NEXT:    or $3, $3, $10
+; MIPS32R6-NEXT:    seleqz $3, $3, $2
+; MIPS32R6-NEXT:    sltiu $10, $1, 64
+; MIPS32R6-NEXT:    or $24, $24, $3
+; MIPS32R6-NEXT:    selnez $3, $11, $10
+; MIPS32R6-NEXT:    or $9, $12, $9
+; MIPS32R6-NEXT:    seleqz $11, $15, $2
+; MIPS32R6-NEXT:    seleqz $8, $8, $2
+; MIPS32R6-NEXT:    seleqz $2, $14, $2
+; MIPS32R6-NEXT:    seleqz $12, $zero, $10
+; MIPS32R6-NEXT:    addiu $14, $1, -64
+; MIPS32R6-NEXT:    seleqz $6, $6, $1
+; MIPS32R6-NEXT:    andi $15, $14, 63
+; MIPS32R6-NEXT:    or $2, $9, $2
+; MIPS32R6-NEXT:    or $3, $12, $3
+; MIPS32R6-NEXT:    selnez $8, $8, $10
 ; MIPS32R6-NEXT:    seleqz $7, $7, $1
+; MIPS32R6-NEXT:    or $9, $11, $24
+; MIPS32R6-NEXT:    selnez $9, $9, $10
+; MIPS32R6-NEXT:    srlv $11, $4, $15
+; MIPS32R6-NEXT:    andi $14, $14, 32
+; MIPS32R6-NEXT:    seleqz $4, $11, $14
+; MIPS32R6-NEXT:    seleqz $4, $4, $10
+; MIPS32R6-NEXT:    or $4, $9, $4
+; MIPS32R6-NEXT:    selnez $4, $4, $1
+; MIPS32R6-NEXT:    or $4, $6, $4
+; MIPS32R6-NEXT:    selnez $2, $2, $10
+; MIPS32R6-NEXT:    srlv $5, $5, $15
+; MIPS32R6-NEXT:    not $6, $15
+; MIPS32R6-NEXT:    sllv $6, $13, $6
+; MIPS32R6-NEXT:    or $5, $6, $5
+; MIPS32R6-NEXT:    seleqz $5, $5, $14
+; MIPS32R6-NEXT:    selnez $6, $11, $14
+; MIPS32R6-NEXT:    or $5, $6, $5
+; MIPS32R6-NEXT:    seleqz $5, $5, $10
 ; MIPS32R6-NEXT:    or $2, $2, $5
-; MIPS32R6-NEXT:    selnez $2, $2, $1
-; MIPS32R6-NEXT:    or $5, $7, $2
-; MIPS32R6-NEXT:    or $2, $9, $11
-; MIPS32R6-NEXT:    seleqz $3, $3, $12
-; MIPS32R6-NEXT:    selnez $7, $4, $12
-; MIPS32R6-NEXT:    seleqz $4, $6, $1
-; MIPS32R6-NEXT:    selnez $6, $10, $15
-; MIPS32R6-NEXT:    seleqz $8, $8, $25
-; MIPS32R6-NEXT:    seleqz $8, $8, $15
-; MIPS32R6-NEXT:    or $6, $6, $8
-; MIPS32R6-NEXT:    selnez $1, $6, $1
-; MIPS32R6-NEXT:    or $4, $4, $1
-; MIPS32R6-NEXT:    or $1, $7, $3
-; MIPS32R6-NEXT:    selnez $1, $1, $15
-; MIPS32R6-NEXT:    or $3, $9, $1
-; MIPS32R6-NEXT:    lw $16, 4($sp) # 4-byte Folded Reload
+; MIPS32R6-NEXT:    selnez $1, $2, $1
+; MIPS32R6-NEXT:    or $5, $7, $1
 ; MIPS32R6-NEXT:    jr $ra
-; MIPS32R6-NEXT:    addiu $sp, $sp, 8
+; MIPS32R6-NEXT:    or $2, $12, $8
 ;
 ; MIPS3-LABEL: lshr_i128:
 ; MIPS3:       # %bb.0: # %entry
@@ -770,183 +766,172 @@
 ;
 ; MMR3-LABEL: lshr_i128:
 ; MMR3:       # %bb.0: # %entry
-; MMR3-NEXT:    addiusp -40
-; MMR3-NEXT:    .cfi_def_cfa_offset 40
-; MMR3-NEXT:    swp $16, 32($sp)
+; MMR3-NEXT:    addiusp -48
+; MMR3-NEXT:    .cfi_def_cfa_offset 48
+; MMR3-NEXT:    swp $16, 40($sp)
 ; MMR3-NEXT:    .cfi_offset 17, -4
 ; MMR3-NEXT:    .cfi_offset 16, -8
 ; MMR3-NEXT:    move $8, $7
-; MMR3-NEXT:    sw $6, 24($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $4, 28($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    lw $16, 68($sp)
+; MMR3-NEXT:    move $17, $6
+; MMR3-NEXT:    sw $6, 20($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sw $4, 32($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    lw $3, 76($sp)
+; MMR3-NEXT:    sw $3, 24($sp) # 4-byte Folded Spill
 ; MMR3-NEXT:    li16 $2, 64
-; MMR3-NEXT:    subu16 $7, $2, $16
-; MMR3-NEXT:    sllv $9, $5, $7
-; MMR3-NEXT:    move $17, $5
-; MMR3-NEXT:    sw $5, 0($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    andi16 $3, $7, 32
-; MMR3-NEXT:    sw $3, 20($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    subu16 $2, $2, $3
+; MMR3-NEXT:    andi16 $6, $2, 63
+; MMR3-NEXT:    sllv $9, $5, $6
+; MMR3-NEXT:    sw $5, 28($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    andi16 $3, $2, 32
 ; MMR3-NEXT:    li16 $2, 0
-; MMR3-NEXT:    move $4, $9
-; MMR3-NEXT:    movn $4, $2, $3
-; MMR3-NEXT:    srlv $5, $8, $16
-; MMR3-NEXT:    not16 $3, $16
-; MMR3-NEXT:    sw $3, 16($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sll16 $2, $6, 1
-; MMR3-NEXT:    sllv $2, $2, $3
-; MMR3-NEXT:    or16 $2, $5
-; MMR3-NEXT:    srlv $5, $6, $16
-; MMR3-NEXT:    sw $5, 4($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    andi16 $3, $16, 32
+; MMR3-NEXT:    move $16, $9
+; MMR3-NEXT:    movn $16, $2, $3
 ; MMR3-NEXT:    sw $3, 12($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    movn $2, $5, $3
-; MMR3-NEXT:    addiu $3, $16, -64
-; MMR3-NEXT:    or16 $2, $4
-; MMR3-NEXT:    srlv $4, $17, $3
-; MMR3-NEXT:    sw $4, 8($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    lw $4, 28($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    sll16 $6, $4, 1
-; MMR3-NEXT:    not16 $5, $3
-; MMR3-NEXT:    sllv $5, $6, $5
-; MMR3-NEXT:    lw $17, 8($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    or16 $5, $17
-; MMR3-NEXT:    srlv $1, $4, $3
-; MMR3-NEXT:    andi16 $3, $3, 32
+; MMR3-NEXT:    srlv $7, $7, $6
+; MMR3-NEXT:    not16 $4, $6
+; MMR3-NEXT:    sw $4, 36($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sll16 $2, $17, 1
+; MMR3-NEXT:    sllv $2, $2, $4
+; MMR3-NEXT:    or16 $2, $7
+; MMR3-NEXT:    srlv $7, $17, $6
+; MMR3-NEXT:    movn $2, $7, $3
+; MMR3-NEXT:    lw $17, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    addiu $3, $17, -64
 ; MMR3-NEXT:    sw $3, 8($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    movn $5, $1, $3
-; MMR3-NEXT:    sltiu $10, $16, 64
-; MMR3-NEXT:    movn $5, $2, $10
-; MMR3-NEXT:    sllv $2, $4, $7
-; MMR3-NEXT:    not16 $3, $7
-; MMR3-NEXT:    lw $7, 0($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    srl16 $4, $7, 1
-; MMR3-NEXT:    srlv $4, $4, $3
-; MMR3-NEXT:    or16 $4, $2
-; MMR3-NEXT:    srlv $2, $7, $16
-; MMR3-NEXT:    lw $3, 16($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    sllv $3, $6, $3
-; MMR3-NEXT:    or16 $3, $2
-; MMR3-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    srlv $2, $2, $16
-; MMR3-NEXT:    lw $17, 12($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $3, $2, $17
-; MMR3-NEXT:    movz $5, $8, $16
-; MMR3-NEXT:    li16 $6, 0
-; MMR3-NEXT:    movz $3, $6, $10
-; MMR3-NEXT:    lw $7, 20($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $4, $9, $7
-; MMR3-NEXT:    lw $6, 4($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    li16 $7, 0
-; MMR3-NEXT:    movn $6, $7, $17
-; MMR3-NEXT:    or16 $6, $4
+; MMR3-NEXT:    andi16 $4, $3, 63
+; MMR3-NEXT:    or16 $2, $16
+; MMR3-NEXT:    srlv $3, $5, $4
+; MMR3-NEXT:    sw $3, 4($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    lw $3, 32($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sll16 $5, $3, 1
+; MMR3-NEXT:    sw $5, 16($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    not16 $16, $4
+; MMR3-NEXT:    sllv $16, $5, $16
+; MMR3-NEXT:    lw $5, 4($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    or16 $16, $5
+; MMR3-NEXT:    srlv $1, $3, $4
 ; MMR3-NEXT:    lw $4, 8($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $1, $7, $4
-; MMR3-NEXT:    li16 $7, 0
-; MMR3-NEXT:    movn $1, $6, $10
-; MMR3-NEXT:    lw $4, 24($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movz $1, $4, $16
-; MMR3-NEXT:    movn $2, $7, $17
-; MMR3-NEXT:    li16 $4, 0
-; MMR3-NEXT:    movz $2, $4, $10
+; MMR3-NEXT:    andi16 $5, $4, 32
+; MMR3-NEXT:    movn $16, $1, $5
+; MMR3-NEXT:    sltiu $10, $17, 64
+; MMR3-NEXT:    movn $16, $2, $10
+; MMR3-NEXT:    lw $17, 12($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    li16 $2, 0
+; MMR3-NEXT:    movn $7, $2, $17
+; MMR3-NEXT:    sllv $2, $3, $6
+; MMR3-NEXT:    lw $3, 28($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srl16 $3, $3, 1
+; MMR3-NEXT:    lw $4, 36($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $3, $3, $4
+; MMR3-NEXT:    or16 $3, $2
+; MMR3-NEXT:    movn $3, $9, $17
+; MMR3-NEXT:    or16 $3, $7
+; MMR3-NEXT:    li16 $2, 0
+; MMR3-NEXT:    movn $1, $2, $5
+; MMR3-NEXT:    movn $1, $3, $10
+; MMR3-NEXT:    lw $2, 32($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $9, $2, $6
+; MMR3-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lw $3, 20($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    movz $1, $3, $2
+; MMR3-NEXT:    movz $16, $8, $2
+; MMR3-NEXT:    move $2, $9
+; MMR3-NEXT:    li16 $3, 0
+; MMR3-NEXT:    movn $2, $3, $17
+; MMR3-NEXT:    movz $2, $3, $10
+; MMR3-NEXT:    li16 $5, 0
+; MMR3-NEXT:    lw $3, 28($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $4, $3, $6
+; MMR3-NEXT:    lw $3, 36($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    lw $6, 16($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sllv $3, $6, $3
+; MMR3-NEXT:    or16 $3, $4
+; MMR3-NEXT:    movn $3, $9, $17
+; MMR3-NEXT:    movz $3, $5, $10
 ; MMR3-NEXT:    move $4, $1
-; MMR3-NEXT:    lwp $16, 32($sp)
-; MMR3-NEXT:    addiusp 40
+; MMR3-NEXT:    move $5, $16
+; MMR3-NEXT:    lwp $16, 40($sp)
+; MMR3-NEXT:    addiusp 48
 ; MMR3-NEXT:    jrc $ra
 ;
 ; MMR6-LABEL: lshr_i128:
 ; MMR6:       # %bb.0: # %entry
-; MMR6-NEXT:    addiu $sp, $sp, -32
-; MMR6-NEXT:    .cfi_def_cfa_offset 32
-; MMR6-NEXT:    sw $17, 28($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    sw $16, 24($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    addiu $sp, $sp, -8
+; MMR6-NEXT:    .cfi_def_cfa_offset 8
+; MMR6-NEXT:    sw $17, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    sw $16, 0($sp) # 4-byte Folded Spill
 ; MMR6-NEXT:    .cfi_offset 17, -4
 ; MMR6-NEXT:    .cfi_offset 16, -8
 ; MMR6-NEXT:    move $1, $7
-; MMR6-NEXT:    move $7, $5
-; MMR6-NEXT:    lw $3, 60($sp)
-; MMR6-NEXT:    srlv $2, $1, $3
-; MMR6-NEXT:    not16 $5, $3
-; MMR6-NEXT:    sw $5, 12($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    move $17, $6
-; MMR6-NEXT:    sw $6, 16($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    sll16 $6, $6, 1
-; MMR6-NEXT:    sllv $6, $6, $5
-; MMR6-NEXT:    or $8, $6, $2
-; MMR6-NEXT:    addiu $5, $3, -64
-; MMR6-NEXT:    srlv $9, $7, $5
-; MMR6-NEXT:    move $6, $4
-; MMR6-NEXT:    sll16 $2, $4, 1
-; MMR6-NEXT:    sw $2, 8($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    not16 $16, $5
-; MMR6-NEXT:    sllv $10, $2, $16
-; MMR6-NEXT:    andi16 $16, $3, 32
-; MMR6-NEXT:    seleqz $8, $8, $16
+; MMR6-NEXT:    lw $7, 36($sp)
+; MMR6-NEXT:    li16 $2, 64
+; MMR6-NEXT:    subu16 $2, $2, $7
+; MMR6-NEXT:    andi16 $17, $2, 63
+; MMR6-NEXT:    srlv $8, $4, $17
+; MMR6-NEXT:    andi16 $3, $2, 32
+; MMR6-NEXT:    srlv $9, $1, $17
+; MMR6-NEXT:    not16 $2, $17
+; MMR6-NEXT:    sll16 $16, $6, 1
+; MMR6-NEXT:    sllv $10, $16, $2
 ; MMR6-NEXT:    or $9, $10, $9
-; MMR6-NEXT:    srlv $10, $17, $3
-; MMR6-NEXT:    selnez $11, $10, $16
-; MMR6-NEXT:    li16 $17, 64
-; MMR6-NEXT:    subu16 $2, $17, $3
-; MMR6-NEXT:    sllv $12, $7, $2
-; MMR6-NEXT:    move $17, $7
-; MMR6-NEXT:    andi16 $4, $2, 32
-; MMR6-NEXT:    andi16 $7, $5, 32
-; MMR6-NEXT:    sw $7, 20($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    seleqz $9, $9, $7
-; MMR6-NEXT:    seleqz $13, $12, $4
-; MMR6-NEXT:    or $8, $11, $8
-; MMR6-NEXT:    selnez $11, $12, $4
-; MMR6-NEXT:    sllv $12, $6, $2
-; MMR6-NEXT:    move $7, $6
-; MMR6-NEXT:    sw $6, 4($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    not16 $2, $2
-; MMR6-NEXT:    srl16 $6, $17, 1
-; MMR6-NEXT:    srlv $2, $6, $2
-; MMR6-NEXT:    or $2, $12, $2
-; MMR6-NEXT:    seleqz $2, $2, $4
-; MMR6-NEXT:    srlv $4, $7, $5
-; MMR6-NEXT:    or $11, $11, $2
-; MMR6-NEXT:    or $5, $8, $13
-; MMR6-NEXT:    srlv $6, $17, $3
-; MMR6-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    selnez $7, $4, $2
-; MMR6-NEXT:    sltiu $8, $3, 64
-; MMR6-NEXT:    selnez $12, $5, $8
-; MMR6-NEXT:    or $7, $7, $9
-; MMR6-NEXT:    lw $5, 12($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    lw $2, 8($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    sllv $9, $2, $5
-; MMR6-NEXT:    seleqz $10, $10, $16
-; MMR6-NEXT:    li16 $5, 0
+; MMR6-NEXT:    selnez $10, $8, $3
+; MMR6-NEXT:    srlv $11, $5, $17
+; MMR6-NEXT:    sll16 $16, $4, 1
+; MMR6-NEXT:    sllv $12, $16, $2
+; MMR6-NEXT:    or $11, $12, $11
+; MMR6-NEXT:    seleqz $11, $11, $3
+; MMR6-NEXT:    sllv $12, $5, $17
+; MMR6-NEXT:    srlv $13, $6, $17
 ; MMR6-NEXT:    or $10, $10, $11
-; MMR6-NEXT:    or $6, $9, $6
-; MMR6-NEXT:    seleqz $2, $7, $8
-; MMR6-NEXT:    seleqz $7, $5, $8
-; MMR6-NEXT:    lw $5, 4($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    srlv $9, $5, $3
-; MMR6-NEXT:    seleqz $11, $9, $16
-; MMR6-NEXT:    selnez $11, $11, $8
-; MMR6-NEXT:    seleqz $1, $1, $3
-; MMR6-NEXT:    or $2, $12, $2
-; MMR6-NEXT:    selnez $2, $2, $3
-; MMR6-NEXT:    or $5, $1, $2
-; MMR6-NEXT:    or $2, $7, $11
-; MMR6-NEXT:    seleqz $1, $6, $16
-; MMR6-NEXT:    selnez $6, $9, $16
-; MMR6-NEXT:    lw $16, 16($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    seleqz $9, $16, $3
-; MMR6-NEXT:    selnez $10, $10, $8
-; MMR6-NEXT:    lw $16, 20($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    seleqz $4, $4, $16
-; MMR6-NEXT:    seleqz $4, $4, $8
+; MMR6-NEXT:    selnez $11, $13, $3
+; MMR6-NEXT:    seleqz $9, $9, $3
+; MMR6-NEXT:    selnez $14, $12, $3
+; MMR6-NEXT:    sllv $15, $4, $17
+; MMR6-NEXT:    srl16 $17, $5, 1
+; MMR6-NEXT:    srlv $2, $17, $2
+; MMR6-NEXT:    or $2, $15, $2
+; MMR6-NEXT:    seleqz $2, $2, $3
+; MMR6-NEXT:    sltiu $15, $7, 64
+; MMR6-NEXT:    or $14, $14, $2
+; MMR6-NEXT:    selnez $10, $10, $15
+; MMR6-NEXT:    or $9, $11, $9
+; MMR6-NEXT:    seleqz $11, $13, $3
+; MMR6-NEXT:    seleqz $8, $8, $3
+; MMR6-NEXT:    seleqz $3, $12, $3
+; MMR6-NEXT:    li16 $17, 0
+; MMR6-NEXT:    seleqz $12, $17, $15
+; MMR6-NEXT:    addiu $17, $7, -64
+; MMR6-NEXT:    seleqz $6, $6, $7
+; MMR6-NEXT:    andi16 $2, $17, 63
+; MMR6-NEXT:    or $9, $9, $3
+; MMR6-NEXT:    or $3, $12, $10
+; MMR6-NEXT:    selnez $8, $8, $15
+; MMR6-NEXT:    seleqz $1, $1, $7
+; MMR6-NEXT:    or $10, $11, $14
+; MMR6-NEXT:    selnez $10, $10, $15
+; MMR6-NEXT:    srlv $11, $4, $2
+; MMR6-NEXT:    andi16 $17, $17, 32
+; MMR6-NEXT:    seleqz $4, $11, $17
+; MMR6-NEXT:    seleqz $4, $4, $15
 ; MMR6-NEXT:    or $4, $10, $4
-; MMR6-NEXT:    selnez $3, $4, $3
-; MMR6-NEXT:    or $4, $9, $3
-; MMR6-NEXT:    or $1, $6, $1
-; MMR6-NEXT:    selnez $1, $1, $8
-; MMR6-NEXT:    or $3, $7, $1
-; MMR6-NEXT:    lw $16, 24($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    lw $17, 28($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    addiu $sp, $sp, 32
+; MMR6-NEXT:    selnez $4, $4, $7
+; MMR6-NEXT:    or $4, $6, $4
+; MMR6-NEXT:    selnez $6, $9, $15
+; MMR6-NEXT:    srlv $5, $5, $2
+; MMR6-NEXT:    not16 $2, $2
+; MMR6-NEXT:    sllv $2, $16, $2
+; MMR6-NEXT:    or $2, $2, $5
+; MMR6-NEXT:    seleqz $2, $2, $17
+; MMR6-NEXT:    selnez $5, $11, $17
+; MMR6-NEXT:    or $2, $5, $2
+; MMR6-NEXT:    seleqz $2, $2, $15
+; MMR6-NEXT:    or $2, $6, $2
+; MMR6-NEXT:    selnez $2, $2, $7
+; MMR6-NEXT:    or $5, $1, $2
+; MMR6-NEXT:    or $2, $12, $8
+; MMR6-NEXT:    lw $16, 0($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    lw $17, 4($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    addiu $sp, $sp, 8
 ; MMR6-NEXT:    jrc $ra
 entry:
 
diff --git a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll
--- a/llvm/test/CodeGen/Mips/llvm-ir/shl.ll
+++ b/llvm/test/CodeGen/Mips/llvm-ir/shl.ll
@@ -458,305 +458,310 @@
 ; MIPS2:       # %bb.0: # %entry
 ; MIPS2-NEXT:    addiu $sp, $sp, -8
 ; MIPS2-NEXT:    .cfi_def_cfa_offset 8
-; MIPS2-NEXT:    sw $17, 4($sp) # 4-byte Folded Spill
-; MIPS2-NEXT:    sw $16, 0($sp) # 4-byte Folded Spill
-; MIPS2-NEXT:    .cfi_offset 17, -4
-; MIPS2-NEXT:    .cfi_offset 16, -8
-; MIPS2-NEXT:    lw $8, 36($sp)
+; MIPS2-NEXT:    sw $16, 4($sp) # 4-byte Folded Spill
+; MIPS2-NEXT:    .cfi_offset 16, -4
+; MIPS2-NEXT:    lw $10, 36($sp)
 ; MIPS2-NEXT:    addiu $1, $zero, 64
-; MIPS2-NEXT:    subu $3, $1, $8
-; MIPS2-NEXT:    srlv $9, $6, $3
-; MIPS2-NEXT:    andi $1, $3, 32
-; MIPS2-NEXT:    bnez $1, $BB5_2
-; MIPS2-NEXT:    addiu $2, $zero, 0
+; MIPS2-NEXT:    subu $1, $1, $10
+; MIPS2-NEXT:    andi $2, $1, 63
+; MIPS2-NEXT:    not $3, $2
+; MIPS2-NEXT:    srlv $13, $6, $2
+; MIPS2-NEXT:    andi $9, $1, 32
+; MIPS2-NEXT:    bnez $9, $BB5_2
+; MIPS2-NEXT:    addiu $8, $zero, 0
 ; MIPS2-NEXT:  # %bb.1: # %entry
-; MIPS2-NEXT:    srlv $1, $7, $3
-; MIPS2-NEXT:    not $3, $3
-; MIPS2-NEXT:    sll $10, $6, 1
-; MIPS2-NEXT:    sllv $3, $10, $3
-; MIPS2-NEXT:    or $3, $3, $1
+; MIPS2-NEXT:    sll $1, $6, 1
+; MIPS2-NEXT:    srlv $11, $7, $2
+; MIPS2-NEXT:    sllv $1, $1, $3
+; MIPS2-NEXT:    or $12, $1, $11
 ; MIPS2-NEXT:    b $BB5_3
-; MIPS2-NEXT:    move $15, $9
+; MIPS2-NEXT:    move $11, $13
 ; MIPS2-NEXT:  $BB5_2:
-; MIPS2-NEXT:    addiu $15, $zero, 0
-; MIPS2-NEXT:    move $3, $9
+; MIPS2-NEXT:    addiu $11, $zero, 0
+; MIPS2-NEXT:    move $12, $13
 ; MIPS2-NEXT:  $BB5_3: # %entry
-; MIPS2-NEXT:    not $13, $8
-; MIPS2-NEXT:    sllv $9, $5, $8
-; MIPS2-NEXT:    andi $10, $8, 32
-; MIPS2-NEXT:    bnez $10, $BB5_5
-; MIPS2-NEXT:    move $25, $9
+; MIPS2-NEXT:    sllv $13, $5, $2
+; MIPS2-NEXT:    bnez $9, $BB5_5
+; MIPS2-NEXT:    addiu $24, $zero, 0
 ; MIPS2-NEXT:  # %bb.4: # %entry
-; MIPS2-NEXT:    sllv $1, $4, $8
-; MIPS2-NEXT:    srl $11, $5, 1
-; MIPS2-NEXT:    srlv $11, $11, $13
-; MIPS2-NEXT:    or $25, $1, $11
+; MIPS2-NEXT:    move $24, $13
 ; MIPS2-NEXT:  $BB5_5: # %entry
-; MIPS2-NEXT:    addiu $14, $8, -64
-; MIPS2-NEXT:    srl $24, $7, 1
-; MIPS2-NEXT:    sllv $11, $7, $14
-; MIPS2-NEXT:    andi $12, $14, 32
-; MIPS2-NEXT:    bnez $12, $BB5_7
-; MIPS2-NEXT:    move $gp, $11
+; MIPS2-NEXT:    bnez $9, $BB5_7
+; MIPS2-NEXT:    nop
 ; MIPS2-NEXT:  # %bb.6: # %entry
-; MIPS2-NEXT:    sllv $1, $6, $14
-; MIPS2-NEXT:    not $14, $14
-; MIPS2-NEXT:    srlv $14, $24, $14
-; MIPS2-NEXT:    or $gp, $1, $14
+; MIPS2-NEXT:    srl $1, $5, 1
+; MIPS2-NEXT:    sllv $13, $4, $2
+; MIPS2-NEXT:    srlv $1, $1, $3
+; MIPS2-NEXT:    or $13, $13, $1
 ; MIPS2-NEXT:  $BB5_7: # %entry
-; MIPS2-NEXT:    sltiu $14, $8, 64
-; MIPS2-NEXT:    beqz $14, $BB5_9
+; MIPS2-NEXT:    addiu $1, $10, -64
+; MIPS2-NEXT:    andi $25, $1, 63
+; MIPS2-NEXT:    sllv $15, $7, $25
+; MIPS2-NEXT:    andi $16, $1, 32
+; MIPS2-NEXT:    beqz $16, $BB5_20
+; MIPS2-NEXT:    addiu $14, $zero, 0
+; MIPS2-NEXT:  # %bb.8: # %entry
+; MIPS2-NEXT:    sltiu $gp, $10, 64
+; MIPS2-NEXT:    bnez $gp, $BB5_21
 ; MIPS2-NEXT:    nop
-; MIPS2-NEXT:  # %bb.8:
-; MIPS2-NEXT:    or $gp, $25, $15
 ; MIPS2-NEXT:  $BB5_9: # %entry
-; MIPS2-NEXT:    sllv $25, $7, $8
-; MIPS2-NEXT:    bnez $10, $BB5_11
-; MIPS2-NEXT:    addiu $17, $zero, 0
-; MIPS2-NEXT:  # %bb.10: # %entry
-; MIPS2-NEXT:    move $17, $25
-; MIPS2-NEXT:  $BB5_11: # %entry
-; MIPS2-NEXT:    addiu $1, $zero, 63
-; MIPS2-NEXT:    sltiu $15, $8, 1
-; MIPS2-NEXT:    beqz $15, $BB5_21
-; MIPS2-NEXT:    sltu $16, $1, $8
-; MIPS2-NEXT:  # %bb.12: # %entry
 ; MIPS2-NEXT:    beqz $16, $BB5_22
-; MIPS2-NEXT:    addiu $7, $zero, 0
-; MIPS2-NEXT:  $BB5_13: # %entry
-; MIPS2-NEXT:    beqz $10, $BB5_23
+; MIPS2-NEXT:    srl $12, $7, 1
+; MIPS2-NEXT:  $BB5_10: # %entry
+; MIPS2-NEXT:    bnez $gp, $BB5_23
+; MIPS2-NEXT:    sltiu $24, $10, 1
+; MIPS2-NEXT:  $BB5_11: # %entry
+; MIPS2-NEXT:    beqz $24, $BB5_24
 ; MIPS2-NEXT:    nop
+; MIPS2-NEXT:  $BB5_12: # %entry
+; MIPS2-NEXT:    bnez $24, $BB5_14
+; MIPS2-NEXT:    addiu $11, $zero, 63
+; MIPS2-NEXT:  $BB5_13: # %entry
+; MIPS2-NEXT:    move $5, $14
 ; MIPS2-NEXT:  $BB5_14: # %entry
-; MIPS2-NEXT:    beqz $16, $BB5_24
-; MIPS2-NEXT:    addiu $6, $zero, 0
-; MIPS2-NEXT:  $BB5_15: # %entry
-; MIPS2-NEXT:    beqz $10, $BB5_25
-; MIPS2-NEXT:    addiu $8, $zero, 0
+; MIPS2-NEXT:    sltu $10, $11, $10
+; MIPS2-NEXT:    sllv $11, $7, $2
+; MIPS2-NEXT:    beqz $9, $BB5_26
+; MIPS2-NEXT:    addiu $13, $zero, 0
+; MIPS2-NEXT:  # %bb.15: # %entry
+; MIPS2-NEXT:    beqz $10, $BB5_27
+; MIPS2-NEXT:    addiu $7, $zero, 0
 ; MIPS2-NEXT:  $BB5_16: # %entry
-; MIPS2-NEXT:    beqz $12, $BB5_26
+; MIPS2-NEXT:    beqz $9, $BB5_28
 ; MIPS2-NEXT:    nop
 ; MIPS2-NEXT:  $BB5_17: # %entry
-; MIPS2-NEXT:    bnez $14, $BB5_27
+; MIPS2-NEXT:    bnez $10, $BB5_19
 ; MIPS2-NEXT:    nop
 ; MIPS2-NEXT:  $BB5_18: # %entry
-; MIPS2-NEXT:    bnez $15, $BB5_20
-; MIPS2-NEXT:    nop
+; MIPS2-NEXT:    move $8, $11
 ; MIPS2-NEXT:  $BB5_19: # %entry
-; MIPS2-NEXT:    move $5, $2
-; MIPS2-NEXT:  $BB5_20: # %entry
 ; MIPS2-NEXT:    move $2, $4
 ; MIPS2-NEXT:    move $3, $5
-; MIPS2-NEXT:    move $4, $6
+; MIPS2-NEXT:    move $4, $8
 ; MIPS2-NEXT:    move $5, $7
-; MIPS2-NEXT:    lw $16, 0($sp) # 4-byte Folded Reload
-; MIPS2-NEXT:    lw $17, 4($sp) # 4-byte Folded Reload
+; MIPS2-NEXT:    lw $16, 4($sp) # 4-byte Folded Reload
 ; MIPS2-NEXT:    jr $ra
 ; MIPS2-NEXT:    addiu $sp, $sp, 8
-; MIPS2-NEXT:  $BB5_21: # %entry
-; MIPS2-NEXT:    move $4, $gp
-; MIPS2-NEXT:    bnez $16, $BB5_13
-; MIPS2-NEXT:    addiu $7, $zero, 0
+; MIPS2-NEXT:  $BB5_20: # %entry
+; MIPS2-NEXT:    sltiu $gp, $10, 64
+; MIPS2-NEXT:    beqz $gp, $BB5_9
+; MIPS2-NEXT:    move $14, $15
+; MIPS2-NEXT:  $BB5_21:
+; MIPS2-NEXT:    or $14, $24, $12
+; MIPS2-NEXT:    bnez $16, $BB5_10
+; MIPS2-NEXT:    srl $12, $7, 1
 ; MIPS2-NEXT:  $BB5_22: # %entry
-; MIPS2-NEXT:    bnez $10, $BB5_14
-; MIPS2-NEXT:    move $7, $17
-; MIPS2-NEXT:  $BB5_23: # %entry
-; MIPS2-NEXT:    sllv $1, $6, $8
-; MIPS2-NEXT:    srlv $6, $24, $13
-; MIPS2-NEXT:    or $25, $1, $6
-; MIPS2-NEXT:    bnez $16, $BB5_15
-; MIPS2-NEXT:    addiu $6, $zero, 0
+; MIPS2-NEXT:    sllv $1, $6, $25
+; MIPS2-NEXT:    not $15, $25
+; MIPS2-NEXT:    srlv $15, $12, $15
+; MIPS2-NEXT:    or $15, $1, $15
+; MIPS2-NEXT:    beqz $gp, $BB5_11
+; MIPS2-NEXT:    sltiu $24, $10, 1
+; MIPS2-NEXT:  $BB5_23:
+; MIPS2-NEXT:    bnez $24, $BB5_12
+; MIPS2-NEXT:    or $15, $13, $11
 ; MIPS2-NEXT:  $BB5_24: # %entry
-; MIPS2-NEXT:    move $6, $25
-; MIPS2-NEXT:    bnez $10, $BB5_16
-; MIPS2-NEXT:    addiu $8, $zero, 0
-; MIPS2-NEXT:  $BB5_25: # %entry
-; MIPS2-NEXT:    bnez $12, $BB5_17
-; MIPS2-NEXT:    move $8, $9
+; MIPS2-NEXT:    move $4, $15
+; MIPS2-NEXT:    bnez $24, $BB5_14
+; MIPS2-NEXT:    addiu $11, $zero, 63
+; MIPS2-NEXT:  # %bb.25: # %entry
+; MIPS2-NEXT:    b $BB5_13
+; MIPS2-NEXT:    nop
 ; MIPS2-NEXT:  $BB5_26: # %entry
-; MIPS2-NEXT:    beqz $14, $BB5_18
-; MIPS2-NEXT:    move $2, $11
-; MIPS2-NEXT:  $BB5_27:
-; MIPS2-NEXT:    bnez $15, $BB5_20
-; MIPS2-NEXT:    or $2, $8, $3
-; MIPS2-NEXT:  # %bb.28:
-; MIPS2-NEXT:    b $BB5_19
+; MIPS2-NEXT:    move $13, $11
+; MIPS2-NEXT:    bnez $10, $BB5_16
+; MIPS2-NEXT:    addiu $7, $zero, 0
+; MIPS2-NEXT:  $BB5_27: # %entry
+; MIPS2-NEXT:    bnez $9, $BB5_17
+; MIPS2-NEXT:    move $7, $13
+; MIPS2-NEXT:  $BB5_28: # %entry
+; MIPS2-NEXT:    sllv $1, $6, $2
+; MIPS2-NEXT:    srlv $2, $12, $3
+; MIPS2-NEXT:    bnez $10, $BB5_19
+; MIPS2-NEXT:    or $11, $1, $2
+; MIPS2-NEXT:  # %bb.29: # %entry
+; MIPS2-NEXT:    b $BB5_18
 ; MIPS2-NEXT:    nop
 ;
 ; MIPS32-LABEL: shl_i128:
 ; MIPS32:       # %bb.0: # %entry
-; MIPS32-NEXT:    lw $8, 28($sp)
-; MIPS32-NEXT:    addiu $1, $zero, 64
-; MIPS32-NEXT:    subu $1, $1, $8
-; MIPS32-NEXT:    srlv $9, $6, $1
-; MIPS32-NEXT:    andi $10, $1, 32
-; MIPS32-NEXT:    move $2, $9
-; MIPS32-NEXT:    movn $2, $zero, $10
-; MIPS32-NEXT:    sllv $3, $4, $8
-; MIPS32-NEXT:    not $11, $8
-; MIPS32-NEXT:    srl $12, $5, 1
-; MIPS32-NEXT:    srlv $12, $12, $11
-; MIPS32-NEXT:    or $3, $3, $12
-; MIPS32-NEXT:    sllv $12, $5, $8
-; MIPS32-NEXT:    andi $13, $8, 32
-; MIPS32-NEXT:    movn $3, $12, $13
-; MIPS32-NEXT:    addiu $14, $8, -64
-; MIPS32-NEXT:    or $15, $3, $2
-; MIPS32-NEXT:    sllv $2, $6, $14
-; MIPS32-NEXT:    srl $24, $7, 1
-; MIPS32-NEXT:    not $3, $14
-; MIPS32-NEXT:    srlv $3, $24, $3
-; MIPS32-NEXT:    or $2, $2, $3
-; MIPS32-NEXT:    sllv $3, $7, $14
-; MIPS32-NEXT:    andi $14, $14, 32
-; MIPS32-NEXT:    movn $2, $3, $14
-; MIPS32-NEXT:    sltiu $25, $8, 64
-; MIPS32-NEXT:    movn $2, $15, $25
-; MIPS32-NEXT:    srlv $15, $7, $1
-; MIPS32-NEXT:    not $1, $1
-; MIPS32-NEXT:    sll $gp, $6, 1
-; MIPS32-NEXT:    sllv $1, $gp, $1
-; MIPS32-NEXT:    or $15, $1, $15
+; MIPS32-NEXT:    lw $1, 28($sp)
+; MIPS32-NEXT:    addiu $2, $zero, 64
+; MIPS32-NEXT:    subu $2, $2, $1
+; MIPS32-NEXT:    andi $8, $2, 63
+; MIPS32-NEXT:    sllv $3, $5, $8
+; MIPS32-NEXT:    andi $9, $2, 32
+; MIPS32-NEXT:    sllv $2, $4, $8
+; MIPS32-NEXT:    not $10, $8
+; MIPS32-NEXT:    srl $11, $5, 1
+; MIPS32-NEXT:    srlv $11, $11, $10
+; MIPS32-NEXT:    or $2, $2, $11
+; MIPS32-NEXT:    movn $2, $3, $9
+; MIPS32-NEXT:    movn $3, $zero, $9
+; MIPS32-NEXT:    srlv $11, $7, $8
+; MIPS32-NEXT:    sll $12, $6, 1
+; MIPS32-NEXT:    sllv $12, $12, $10
+; MIPS32-NEXT:    or $11, $12, $11
+; MIPS32-NEXT:    srlv $12, $6, $8
+; MIPS32-NEXT:    movn $11, $12, $9
+; MIPS32-NEXT:    or $11, $3, $11
+; MIPS32-NEXT:    movn $12, $zero, $9
+; MIPS32-NEXT:    addiu $3, $1, -64
+; MIPS32-NEXT:    andi $13, $3, 63
+; MIPS32-NEXT:    sllv $14, $7, $13
+; MIPS32-NEXT:    andi $15, $3, 32
+; MIPS32-NEXT:    move $3, $14
+; MIPS32-NEXT:    movn $3, $zero, $15
+; MIPS32-NEXT:    sltiu $24, $1, 64
+; MIPS32-NEXT:    movn $3, $11, $24
+; MIPS32-NEXT:    or $11, $2, $12
+; MIPS32-NEXT:    sllv $2, $6, $13
+; MIPS32-NEXT:    srl $12, $7, 1
+; MIPS32-NEXT:    not $13, $13
+; MIPS32-NEXT:    srlv $13, $12, $13
+; MIPS32-NEXT:    or $2, $2, $13
+; MIPS32-NEXT:    movn $2, $14, $15
+; MIPS32-NEXT:    movn $2, $11, $24
+; MIPS32-NEXT:    sllv $7, $7, $8
+; MIPS32-NEXT:    movz $2, $4, $1
+; MIPS32-NEXT:    movz $3, $5, $1
+; MIPS32-NEXT:    move $5, $7
+; MIPS32-NEXT:    movn $5, $zero, $9
+; MIPS32-NEXT:    movz $5, $zero, $24
 ; MIPS32-NEXT:    sllv $1, $6, $8
-; MIPS32-NEXT:    srlv $6, $24, $11
-; MIPS32-NEXT:    or $1, $1, $6
-; MIPS32-NEXT:    sllv $6, $7, $8
-; MIPS32-NEXT:    movn $1, $6, $13
-; MIPS32-NEXT:    movz $2, $4, $8
-; MIPS32-NEXT:    movz $1, $zero, $25
-; MIPS32-NEXT:    movn $15, $9, $10
-; MIPS32-NEXT:    movn $12, $zero, $13
-; MIPS32-NEXT:    or $4, $12, $15
-; MIPS32-NEXT:    movn $3, $zero, $14
-; MIPS32-NEXT:    movn $3, $4, $25
-; MIPS32-NEXT:    movz $3, $5, $8
-; MIPS32-NEXT:    movn $6, $zero, $13
-; MIPS32-NEXT:    movz $6, $zero, $25
-; MIPS32-NEXT:    move $4, $1
+; MIPS32-NEXT:    srlv $4, $12, $10
+; MIPS32-NEXT:    or $4, $1, $4
+; MIPS32-NEXT:    movn $4, $7, $9
 ; MIPS32-NEXT:    jr $ra
-; MIPS32-NEXT:    move $5, $6
+; MIPS32-NEXT:    movz $4, $zero, $24
 ;
 ; MIPS32R2-LABEL: shl_i128:
 ; MIPS32R2:       # %bb.0: # %entry
-; MIPS32R2-NEXT:    lw $8, 28($sp)
-; MIPS32R2-NEXT:    addiu $1, $zero, 64
-; MIPS32R2-NEXT:    subu $1, $1, $8
-; MIPS32R2-NEXT:    srlv $9, $6, $1
-; MIPS32R2-NEXT:    andi $10, $1, 32
-; MIPS32R2-NEXT:    move $2, $9
-; MIPS32R2-NEXT:    movn $2, $zero, $10
-; MIPS32R2-NEXT:    sllv $3, $4, $8
-; MIPS32R2-NEXT:    not $11, $8
-; MIPS32R2-NEXT:    srl $12, $5, 1
-; MIPS32R2-NEXT:    srlv $12, $12, $11
-; MIPS32R2-NEXT:    or $3, $3, $12
-; MIPS32R2-NEXT:    sllv $12, $5, $8
-; MIPS32R2-NEXT:    andi $13, $8, 32
-; MIPS32R2-NEXT:    movn $3, $12, $13
-; MIPS32R2-NEXT:    addiu $14, $8, -64
-; MIPS32R2-NEXT:    or $15, $3, $2
-; MIPS32R2-NEXT:    sllv $2, $6, $14
-; MIPS32R2-NEXT:    srl $24, $7, 1
-; MIPS32R2-NEXT:    not $3, $14
-; MIPS32R2-NEXT:    srlv $3, $24, $3
-; MIPS32R2-NEXT:    or $2, $2, $3
-; MIPS32R2-NEXT:    sllv $3, $7, $14
-; MIPS32R2-NEXT:    andi $14, $14, 32
-; MIPS32R2-NEXT:    movn $2, $3, $14
-; MIPS32R2-NEXT:    sltiu $25, $8, 64
-; MIPS32R2-NEXT:    movn $2, $15, $25
-; MIPS32R2-NEXT:    srlv $15, $7, $1
-; MIPS32R2-NEXT:    not $1, $1
-; MIPS32R2-NEXT:    sll $gp, $6, 1
-; MIPS32R2-NEXT:    sllv $1, $gp, $1
-; MIPS32R2-NEXT:    or $15, $1, $15
+; MIPS32R2-NEXT:    lw $1, 28($sp)
+; MIPS32R2-NEXT:    addiu $2, $zero, 64
+; MIPS32R2-NEXT:    subu $2, $2, $1
+; MIPS32R2-NEXT:    andi $8, $2, 63
+; MIPS32R2-NEXT:    sllv $3, $5, $8
+; MIPS32R2-NEXT:    andi $9, $2, 32
+; MIPS32R2-NEXT:    sllv $2, $4, $8
+; MIPS32R2-NEXT:    not $10, $8
+; MIPS32R2-NEXT:    srl $11, $5, 1
+; MIPS32R2-NEXT:    srlv $11, $11, $10
+; MIPS32R2-NEXT:    or $2, $2, $11
+; MIPS32R2-NEXT:    movn $2, $3, $9
+; MIPS32R2-NEXT:    movn $3, $zero, $9
+; MIPS32R2-NEXT:    srlv $11, $7, $8
+; MIPS32R2-NEXT:    sll $12, $6, 1
+; MIPS32R2-NEXT:    sllv $12, $12, $10
+; MIPS32R2-NEXT:    or $11, $12, $11
+; MIPS32R2-NEXT:    srlv $12, $6, $8
+; MIPS32R2-NEXT:    movn $11, $12, $9
+; MIPS32R2-NEXT:    or $11, $3, $11
+; MIPS32R2-NEXT:    movn $12, $zero, $9
+; MIPS32R2-NEXT:    addiu $3, $1, -64
+; MIPS32R2-NEXT:    andi $13, $3, 63
+; MIPS32R2-NEXT:    sllv $14, $7, $13
+; MIPS32R2-NEXT:    andi $15, $3, 32
+; MIPS32R2-NEXT:    move $3, $14
+; MIPS32R2-NEXT:    movn $3, $zero, $15
+; MIPS32R2-NEXT:    sltiu $24, $1, 64
+; MIPS32R2-NEXT:    movn $3, $11, $24
+; MIPS32R2-NEXT:    or $11, $2, $12
+; MIPS32R2-NEXT:    sllv $2, $6, $13
+; MIPS32R2-NEXT:    srl $12, $7, 1
+; MIPS32R2-NEXT:    not $13, $13
+; MIPS32R2-NEXT:    srlv $13, $12, $13
+; MIPS32R2-NEXT:    or $2, $2, $13
+; MIPS32R2-NEXT:    movn $2, $14, $15
+; MIPS32R2-NEXT:    movn $2, $11, $24
+; MIPS32R2-NEXT:    sllv $7, $7, $8
+; MIPS32R2-NEXT:    movz $2, $4, $1
+; MIPS32R2-NEXT:    movz $3, $5, $1
+; MIPS32R2-NEXT:    move $5, $7
+; MIPS32R2-NEXT:    movn $5, $zero, $9
+; MIPS32R2-NEXT:    movz $5, $zero, $24
 ; MIPS32R2-NEXT:    sllv $1, $6, $8
-; MIPS32R2-NEXT:    srlv $6, $24, $11
-; MIPS32R2-NEXT:    or $1, $1, $6
-; MIPS32R2-NEXT:    sllv $6, $7, $8
-; MIPS32R2-NEXT:    movn $1, $6, $13
-; MIPS32R2-NEXT:    movz $2, $4, $8
-; MIPS32R2-NEXT:    movz $1, $zero, $25
-; MIPS32R2-NEXT:    movn $15, $9, $10
-; MIPS32R2-NEXT:    movn $12, $zero, $13
-; MIPS32R2-NEXT:    or $4, $12, $15
-; MIPS32R2-NEXT:    movn $3, $zero, $14
-; MIPS32R2-NEXT:    movn $3, $4, $25
-; MIPS32R2-NEXT:    movz $3, $5, $8
-; MIPS32R2-NEXT:    movn $6, $zero, $13
-; MIPS32R2-NEXT:    movz $6, $zero, $25
-; MIPS32R2-NEXT:    move $4, $1
+; MIPS32R2-NEXT:    srlv $4, $12, $10
+; MIPS32R2-NEXT:    or $4, $1, $4
+; MIPS32R2-NEXT:    movn $4, $7, $9
 ; MIPS32R2-NEXT:    jr $ra
-; MIPS32R2-NEXT:    move $5, $6
+; MIPS32R2-NEXT:    movz $4, $zero, $24
 ;
 ; MIPS32R6-LABEL: shl_i128:
 ; MIPS32R6:       # %bb.0: # %entry
-; MIPS32R6-NEXT:    lw $3, 28($sp)
-; MIPS32R6-NEXT:    sllv $1, $4, $3
-; MIPS32R6-NEXT:    not $2, $3
-; MIPS32R6-NEXT:    srl $8, $5, 1
-; MIPS32R6-NEXT:    srlv $8, $8, $2
-; MIPS32R6-NEXT:    or $1, $1, $8
-; MIPS32R6-NEXT:    sllv $8, $5, $3
-; MIPS32R6-NEXT:    andi $9, $3, 32
-; MIPS32R6-NEXT:    seleqz $1, $1, $9
-; MIPS32R6-NEXT:    selnez $10, $8, $9
-; MIPS32R6-NEXT:    addiu $11, $zero, 64
-; MIPS32R6-NEXT:    subu $11, $11, $3
-; MIPS32R6-NEXT:    srlv $12, $6, $11
-; MIPS32R6-NEXT:    andi $13, $11, 32
-; MIPS32R6-NEXT:    seleqz $14, $12, $13
-; MIPS32R6-NEXT:    or $1, $10, $1
-; MIPS32R6-NEXT:    selnez $10, $12, $13
-; MIPS32R6-NEXT:    srlv $12, $7, $11
-; MIPS32R6-NEXT:    not $11, $11
-; MIPS32R6-NEXT:    sll $15, $6, 1
-; MIPS32R6-NEXT:    sllv $11, $15, $11
-; MIPS32R6-NEXT:    or $11, $11, $12
-; MIPS32R6-NEXT:    seleqz $11, $11, $13
-; MIPS32R6-NEXT:    addiu $12, $3, -64
-; MIPS32R6-NEXT:    or $10, $10, $11
-; MIPS32R6-NEXT:    or $1, $1, $14
-; MIPS32R6-NEXT:    sllv $11, $6, $12
-; MIPS32R6-NEXT:    srl $13, $7, 1
-; MIPS32R6-NEXT:    not $14, $12
-; MIPS32R6-NEXT:    srlv $14, $13, $14
+; MIPS32R6-NEXT:    addiu $sp, $sp, -8
+; MIPS32R6-NEXT:    .cfi_def_cfa_offset 8
+; MIPS32R6-NEXT:    sw $16, 4($sp) # 4-byte Folded Spill
+; MIPS32R6-NEXT:    .cfi_offset 16, -4
+; MIPS32R6-NEXT:    lw $3, 36($sp)
+; MIPS32R6-NEXT:    addiu $1, $zero, 64
+; MIPS32R6-NEXT:    subu $1, $1, $3
+; MIPS32R6-NEXT:    andi $2, $1, 63
+; MIPS32R6-NEXT:    sllv $8, $6, $2
+; MIPS32R6-NEXT:    not $9, $2
+; MIPS32R6-NEXT:    srl $10, $7, 1
+; MIPS32R6-NEXT:    srlv $11, $10, $9
+; MIPS32R6-NEXT:    sllv $12, $7, $2
+; MIPS32R6-NEXT:    andi $1, $1, 32
+; MIPS32R6-NEXT:    selnez $13, $12, $1
+; MIPS32R6-NEXT:    or $8, $8, $11
+; MIPS32R6-NEXT:    seleqz $8, $8, $1
+; MIPS32R6-NEXT:    sllv $11, $4, $2
+; MIPS32R6-NEXT:    srl $14, $5, 1
+; MIPS32R6-NEXT:    srlv $14, $14, $9
+; MIPS32R6-NEXT:    or $8, $13, $8
 ; MIPS32R6-NEXT:    or $11, $11, $14
-; MIPS32R6-NEXT:    andi $14, $12, 32
-; MIPS32R6-NEXT:    seleqz $11, $11, $14
-; MIPS32R6-NEXT:    sllv $12, $7, $12
-; MIPS32R6-NEXT:    selnez $15, $12, $14
+; MIPS32R6-NEXT:    srlv $13, $7, $2
+; MIPS32R6-NEXT:    sll $14, $6, 1
+; MIPS32R6-NEXT:    sllv $9, $14, $9
+; MIPS32R6-NEXT:    seleqz $11, $11, $1
+; MIPS32R6-NEXT:    or $9, $9, $13
+; MIPS32R6-NEXT:    srlv $13, $6, $2
+; MIPS32R6-NEXT:    addiu $14, $3, -64
+; MIPS32R6-NEXT:    andi $15, $14, 63
 ; MIPS32R6-NEXT:    sltiu $24, $3, 64
-; MIPS32R6-NEXT:    selnez $1, $1, $24
-; MIPS32R6-NEXT:    or $11, $15, $11
-; MIPS32R6-NEXT:    sllv $6, $6, $3
-; MIPS32R6-NEXT:    srlv $2, $13, $2
-; MIPS32R6-NEXT:    seleqz $8, $8, $9
-; MIPS32R6-NEXT:    or $8, $8, $10
-; MIPS32R6-NEXT:    or $6, $6, $2
-; MIPS32R6-NEXT:    seleqz $2, $11, $24
+; MIPS32R6-NEXT:    selnez $8, $8, $24
+; MIPS32R6-NEXT:    selnez $25, $13, $1
+; MIPS32R6-NEXT:    seleqz $9, $9, $1
+; MIPS32R6-NEXT:    sllv $2, $5, $2
+; MIPS32R6-NEXT:    selnez $gp, $2, $1
+; MIPS32R6-NEXT:    seleqz $12, $12, $1
+; MIPS32R6-NEXT:    not $16, $15
+; MIPS32R6-NEXT:    or $11, $gp, $11
+; MIPS32R6-NEXT:    seleqz $13, $13, $1
+; MIPS32R6-NEXT:    or $9, $25, $9
+; MIPS32R6-NEXT:    seleqz $1, $2, $1
+; MIPS32R6-NEXT:    sllv $2, $6, $15
+; MIPS32R6-NEXT:    srlv $6, $10, $16
 ; MIPS32R6-NEXT:    seleqz $10, $zero, $24
-; MIPS32R6-NEXT:    sllv $7, $7, $3
-; MIPS32R6-NEXT:    seleqz $11, $7, $9
-; MIPS32R6-NEXT:    selnez $11, $11, $24
-; MIPS32R6-NEXT:    seleqz $4, $4, $3
-; MIPS32R6-NEXT:    or $1, $1, $2
-; MIPS32R6-NEXT:    selnez $1, $1, $3
-; MIPS32R6-NEXT:    or $2, $4, $1
-; MIPS32R6-NEXT:    or $1, $10, $11
-; MIPS32R6-NEXT:    seleqz $4, $6, $9
-; MIPS32R6-NEXT:    selnez $6, $7, $9
+; MIPS32R6-NEXT:    or $9, $1, $9
+; MIPS32R6-NEXT:    or $1, $10, $8
+; MIPS32R6-NEXT:    selnez $8, $12, $24
 ; MIPS32R6-NEXT:    seleqz $5, $5, $3
-; MIPS32R6-NEXT:    selnez $7, $8, $24
-; MIPS32R6-NEXT:    seleqz $8, $12, $14
-; MIPS32R6-NEXT:    seleqz $8, $8, $24
-; MIPS32R6-NEXT:    or $7, $7, $8
-; MIPS32R6-NEXT:    selnez $3, $7, $3
+; MIPS32R6-NEXT:    seleqz $4, $4, $3
+; MIPS32R6-NEXT:    or $11, $11, $13
+; MIPS32R6-NEXT:    selnez $11, $11, $24
+; MIPS32R6-NEXT:    or $2, $2, $6
+; MIPS32R6-NEXT:    andi $6, $14, 32
+; MIPS32R6-NEXT:    seleqz $2, $2, $6
+; MIPS32R6-NEXT:    sllv $7, $7, $15
+; MIPS32R6-NEXT:    selnez $12, $7, $6
+; MIPS32R6-NEXT:    or $2, $12, $2
+; MIPS32R6-NEXT:    seleqz $2, $2, $24
+; MIPS32R6-NEXT:    or $2, $11, $2
+; MIPS32R6-NEXT:    selnez $2, $2, $3
+; MIPS32R6-NEXT:    or $2, $4, $2
+; MIPS32R6-NEXT:    selnez $4, $9, $24
+; MIPS32R6-NEXT:    seleqz $6, $7, $6
+; MIPS32R6-NEXT:    seleqz $6, $6, $24
+; MIPS32R6-NEXT:    or $4, $4, $6
+; MIPS32R6-NEXT:    selnez $3, $4, $3
 ; MIPS32R6-NEXT:    or $3, $5, $3
-; MIPS32R6-NEXT:    or $4, $6, $4
-; MIPS32R6-NEXT:    selnez $4, $4, $24
-; MIPS32R6-NEXT:    or $4, $10, $4
+; MIPS32R6-NEXT:    or $5, $10, $8
+; MIPS32R6-NEXT:    move $4, $1
+; MIPS32R6-NEXT:    lw $16, 4($sp) # 4-byte Folded Reload
 ; MIPS32R6-NEXT:    jr $ra
-; MIPS32R6-NEXT:    move $5, $1
+; MIPS32R6-NEXT:    addiu $sp, $sp, 8
 ;
 ; MIPS3-LABEL: shl_i128:
 ; MIPS3:       # %bb.0: # %entry
@@ -849,81 +854,83 @@
 ; MMR3-NEXT:    swp $16, 32($sp)
 ; MMR3-NEXT:    .cfi_offset 17, -4
 ; MMR3-NEXT:    .cfi_offset 16, -8
-; MMR3-NEXT:    move $17, $7
-; MMR3-NEXT:    sw $7, 4($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    move $7, $6
+; MMR3-NEXT:    sw $7, 24($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sw $6, 28($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sw $5, 12($sp) # 4-byte Folded Spill
 ; MMR3-NEXT:    move $1, $4
-; MMR3-NEXT:    lw $16, 68($sp)
-; MMR3-NEXT:    li16 $2, 64
-; MMR3-NEXT:    subu16 $6, $2, $16
-; MMR3-NEXT:    srlv $9, $7, $6
-; MMR3-NEXT:    andi16 $4, $6, 32
-; MMR3-NEXT:    sw $4, 24($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    li16 $3, 0
-; MMR3-NEXT:    move $2, $9
-; MMR3-NEXT:    movn $2, $3, $4
-; MMR3-NEXT:    sllv $3, $1, $16
+; MMR3-NEXT:    lw $3, 68($sp)
 ; MMR3-NEXT:    sw $3, 16($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    not16 $4, $16
-; MMR3-NEXT:    sw $4, 20($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    sw $5, 28($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    srl16 $3, $5, 1
-; MMR3-NEXT:    srlv $3, $3, $4
-; MMR3-NEXT:    lw $4, 16($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    or16 $3, $4
-; MMR3-NEXT:    sllv $5, $5, $16
-; MMR3-NEXT:    sw $5, 8($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    andi16 $4, $16, 32
-; MMR3-NEXT:    sw $4, 16($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    movn $3, $5, $4
-; MMR3-NEXT:    addiu $4, $16, -64
-; MMR3-NEXT:    or16 $3, $2
-; MMR3-NEXT:    sllv $2, $7, $4
-; MMR3-NEXT:    sw $2, 12($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    srl16 $5, $17, 1
-; MMR3-NEXT:    not16 $2, $4
-; MMR3-NEXT:    srlv $2, $5, $2
-; MMR3-NEXT:    lw $17, 12($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    or16 $2, $17
-; MMR3-NEXT:    lw $17, 4($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    sllv $8, $17, $4
-; MMR3-NEXT:    andi16 $4, $4, 32
-; MMR3-NEXT:    sw $4, 12($sp) # 4-byte Folded Spill
-; MMR3-NEXT:    movn $2, $8, $4
-; MMR3-NEXT:    sltiu $10, $16, 64
-; MMR3-NEXT:    movn $2, $3, $10
-; MMR3-NEXT:    srlv $4, $17, $6
-; MMR3-NEXT:    not16 $3, $6
-; MMR3-NEXT:    sll16 $6, $7, 1
-; MMR3-NEXT:    sllv $3, $6, $3
-; MMR3-NEXT:    or16 $3, $4
-; MMR3-NEXT:    sllv $6, $7, $16
-; MMR3-NEXT:    lw $4, 20($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    srlv $4, $5, $4
-; MMR3-NEXT:    or16 $4, $6
-; MMR3-NEXT:    sllv $6, $17, $16
-; MMR3-NEXT:    lw $17, 16($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $4, $6, $17
-; MMR3-NEXT:    movz $2, $1, $16
+; MMR3-NEXT:    li16 $2, 64
+; MMR3-NEXT:    subu16 $2, $2, $3
+; MMR3-NEXT:    andi16 $4, $2, 63
+; MMR3-NEXT:    sllv $3, $5, $4
+; MMR3-NEXT:    andi16 $2, $2, 32
+; MMR3-NEXT:    sw $2, 20($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    sllv $6, $1, $4
+; MMR3-NEXT:    not16 $7, $4
+; MMR3-NEXT:    move $17, $4
+; MMR3-NEXT:    sw $4, 4($sp) # 4-byte Folded Spill
+; MMR3-NEXT:    srl16 $2, $5, 1
+; MMR3-NEXT:    srlv $16, $2, $7
+; MMR3-NEXT:    move $4, $7
+; MMR3-NEXT:    sw $7, 8($sp) # 4-byte Folded Spill
 ; MMR3-NEXT:    li16 $5, 0
-; MMR3-NEXT:    movz $4, $5, $10
-; MMR3-NEXT:    lw $7, 24($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $3, $9, $7
-; MMR3-NEXT:    lw $5, 8($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    li16 $7, 0
-; MMR3-NEXT:    movn $5, $7, $17
-; MMR3-NEXT:    or16 $5, $3
-; MMR3-NEXT:    lw $3, 12($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movn $8, $7, $3
-; MMR3-NEXT:    li16 $7, 0
-; MMR3-NEXT:    movn $8, $5, $10
-; MMR3-NEXT:    lw $3, 28($sp) # 4-byte Folded Reload
-; MMR3-NEXT:    movz $8, $3, $16
-; MMR3-NEXT:    movn $6, $7, $17
+; MMR3-NEXT:    or16 $16, $6
+; MMR3-NEXT:    lw $2, 20($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    movn $16, $3, $2
+; MMR3-NEXT:    movn $3, $5, $2
+; MMR3-NEXT:    lw $5, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $7, $5, $17
+; MMR3-NEXT:    lw $5, 28($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sll16 $6, $5, 1
+; MMR3-NEXT:    sllv $6, $6, $4
+; MMR3-NEXT:    or16 $6, $7
+; MMR3-NEXT:    srlv $4, $5, $17
+; MMR3-NEXT:    movn $6, $4, $2
+; MMR3-NEXT:    or16 $6, $3
 ; MMR3-NEXT:    li16 $3, 0
-; MMR3-NEXT:    movz $6, $3, $10
+; MMR3-NEXT:    movn $4, $3, $2
+; MMR3-NEXT:    lw $2, 16($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    addiu $3, $2, -64
+; MMR3-NEXT:    andi16 $7, $3, 63
+; MMR3-NEXT:    lw $5, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sllv $8, $5, $7
+; MMR3-NEXT:    andi16 $5, $3, 32
 ; MMR3-NEXT:    move $3, $8
-; MMR3-NEXT:    move $5, $6
+; MMR3-NEXT:    li16 $17, 0
+; MMR3-NEXT:    movn $3, $17, $5
+; MMR3-NEXT:    sltiu $9, $2, 64
+; MMR3-NEXT:    movn $3, $6, $9
+; MMR3-NEXT:    or16 $4, $16
+; MMR3-NEXT:    lw $2, 28($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sllv $17, $2, $7
+; MMR3-NEXT:    lw $2, 24($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srl16 $16, $2, 1
+; MMR3-NEXT:    move $6, $2
+; MMR3-NEXT:    not16 $2, $7
+; MMR3-NEXT:    srlv $2, $16, $2
+; MMR3-NEXT:    or16 $2, $17
+; MMR3-NEXT:    movn $2, $8, $5
+; MMR3-NEXT:    movn $2, $4, $9
+; MMR3-NEXT:    lw $17, 4($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sllv $8, $6, $17
+; MMR3-NEXT:    lw $4, 16($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    movz $2, $1, $4
+; MMR3-NEXT:    lw $5, 12($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    movz $3, $5, $4
+; MMR3-NEXT:    move $5, $8
+; MMR3-NEXT:    li16 $4, 0
+; MMR3-NEXT:    lw $6, 20($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    movn $5, $4, $6
+; MMR3-NEXT:    movz $5, $4, $9
+; MMR3-NEXT:    lw $4, 28($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    sllv $7, $4, $17
+; MMR3-NEXT:    lw $4, 8($sp) # 4-byte Folded Reload
+; MMR3-NEXT:    srlv $4, $16, $4
+; MMR3-NEXT:    or16 $4, $7
+; MMR3-NEXT:    movn $4, $8, $6
+; MMR3-NEXT:    li16 $6, 0
+; MMR3-NEXT:    movz $4, $6, $9
 ; MMR3-NEXT:    lwp $16, 32($sp)
 ; MMR3-NEXT:    addiusp 40
 ; MMR3-NEXT:    jrc $ra
@@ -936,76 +943,76 @@
 ; MMR6-NEXT:    sw $16, 8($sp) # 4-byte Folded Spill
 ; MMR6-NEXT:    .cfi_offset 17, -4
 ; MMR6-NEXT:    .cfi_offset 16, -8
-; MMR6-NEXT:    move $11, $4
+; MMR6-NEXT:    move $8, $4
 ; MMR6-NEXT:    lw $3, 44($sp)
-; MMR6-NEXT:    sllv $1, $4, $3
-; MMR6-NEXT:    not16 $2, $3
-; MMR6-NEXT:    sw $2, 4($sp) # 4-byte Folded Spill
-; MMR6-NEXT:    srl16 $16, $5, 1
-; MMR6-NEXT:    srlv $8, $16, $2
-; MMR6-NEXT:    or $1, $1, $8
-; MMR6-NEXT:    sllv $8, $5, $3
-; MMR6-NEXT:    andi16 $16, $3, 32
+; MMR6-NEXT:    li16 $2, 64
+; MMR6-NEXT:    subu16 $2, $2, $3
+; MMR6-NEXT:    andi16 $17, $2, 63
+; MMR6-NEXT:    sllv $1, $6, $17
+; MMR6-NEXT:    not16 $4, $17
+; MMR6-NEXT:    srl16 $16, $7, 1
+; MMR6-NEXT:    sw $16, 4($sp) # 4-byte Folded Spill
+; MMR6-NEXT:    srlv $9, $16, $4
+; MMR6-NEXT:    sllv $10, $7, $17
+; MMR6-NEXT:    andi16 $16, $2, 32
+; MMR6-NEXT:    selnez $11, $10, $16
+; MMR6-NEXT:    or $1, $1, $9
 ; MMR6-NEXT:    seleqz $1, $1, $16
-; MMR6-NEXT:    selnez $9, $8, $16
-; MMR6-NEXT:    li16 $17, 64
-; MMR6-NEXT:    subu16 $17, $17, $3
-; MMR6-NEXT:    srlv $10, $6, $17
-; MMR6-NEXT:    andi16 $2, $17, 32
-; MMR6-NEXT:    seleqz $12, $10, $2
-; MMR6-NEXT:    or $1, $9, $1
-; MMR6-NEXT:    selnez $9, $10, $2
-; MMR6-NEXT:    srlv $10, $7, $17
-; MMR6-NEXT:    not16 $17, $17
-; MMR6-NEXT:    sll16 $4, $6, 1
-; MMR6-NEXT:    sllv $4, $4, $17
-; MMR6-NEXT:    or $4, $4, $10
-; MMR6-NEXT:    seleqz $2, $4, $2
+; MMR6-NEXT:    sllv $9, $8, $17
+; MMR6-NEXT:    srl16 $2, $5, 1
+; MMR6-NEXT:    srlv $2, $2, $4
+; MMR6-NEXT:    or $1, $11, $1
+; MMR6-NEXT:    or $11, $9, $2
+; MMR6-NEXT:    srlv $9, $7, $17
+; MMR6-NEXT:    sll16 $2, $6, 1
+; MMR6-NEXT:    sllv $2, $2, $4
+; MMR6-NEXT:    seleqz $11, $11, $16
+; MMR6-NEXT:    or $9, $2, $9
+; MMR6-NEXT:    srlv $12, $6, $17
 ; MMR6-NEXT:    addiu $4, $3, -64
-; MMR6-NEXT:    or $10, $9, $2
-; MMR6-NEXT:    or $1, $1, $12
-; MMR6-NEXT:    sllv $9, $6, $4
-; MMR6-NEXT:    srl16 $2, $7, 1
-; MMR6-NEXT:    not16 $17, $4
-; MMR6-NEXT:    srlv $12, $2, $17
-; MMR6-NEXT:    or $9, $9, $12
-; MMR6-NEXT:    andi16 $17, $4, 32
-; MMR6-NEXT:    seleqz $9, $9, $17
-; MMR6-NEXT:    sllv $14, $7, $4
-; MMR6-NEXT:    selnez $12, $14, $17
+; MMR6-NEXT:    andi16 $2, $4, 63
 ; MMR6-NEXT:    sltiu $13, $3, 64
 ; MMR6-NEXT:    selnez $1, $1, $13
-; MMR6-NEXT:    or $9, $12, $9
-; MMR6-NEXT:    sllv $6, $6, $3
-; MMR6-NEXT:    lw $4, 4($sp) # 4-byte Folded Reload
-; MMR6-NEXT:    srlv $2, $2, $4
-; MMR6-NEXT:    seleqz $8, $8, $16
-; MMR6-NEXT:    li16 $4, 0
-; MMR6-NEXT:    or $8, $8, $10
-; MMR6-NEXT:    or $6, $6, $2
-; MMR6-NEXT:    seleqz $2, $9, $13
-; MMR6-NEXT:    seleqz $9, $4, $13
-; MMR6-NEXT:    sllv $7, $7, $3
-; MMR6-NEXT:    seleqz $10, $7, $16
+; MMR6-NEXT:    selnez $14, $12, $16
+; MMR6-NEXT:    seleqz $9, $9, $16
+; MMR6-NEXT:    sllv $15, $5, $17
+; MMR6-NEXT:    selnez $24, $15, $16
+; MMR6-NEXT:    seleqz $10, $10, $16
+; MMR6-NEXT:    not16 $17, $2
+; MMR6-NEXT:    or $11, $24, $11
+; MMR6-NEXT:    seleqz $12, $12, $16
+; MMR6-NEXT:    or $9, $14, $9
+; MMR6-NEXT:    seleqz $14, $15, $16
+; MMR6-NEXT:    sllv $6, $6, $2
+; MMR6-NEXT:    lw $16, 4($sp) # 4-byte Folded Reload
+; MMR6-NEXT:    srlv $15, $16, $17
+; MMR6-NEXT:    li16 $16, 0
+; MMR6-NEXT:    seleqz $24, $16, $13
+; MMR6-NEXT:    or $9, $14, $9
+; MMR6-NEXT:    or $1, $24, $1
 ; MMR6-NEXT:    selnez $10, $10, $13
-; MMR6-NEXT:    seleqz $11, $11, $3
-; MMR6-NEXT:    or $1, $1, $2
-; MMR6-NEXT:    selnez $1, $1, $3
-; MMR6-NEXT:    or $2, $11, $1
-; MMR6-NEXT:    or $1, $9, $10
-; MMR6-NEXT:    seleqz $6, $6, $16
-; MMR6-NEXT:    selnez $7, $7, $16
 ; MMR6-NEXT:    seleqz $5, $5, $3
-; MMR6-NEXT:    selnez $8, $8, $13
-; MMR6-NEXT:    seleqz $4, $14, $17
+; MMR6-NEXT:    seleqz $8, $8, $3
+; MMR6-NEXT:    or $11, $11, $12
+; MMR6-NEXT:    selnez $11, $11, $13
+; MMR6-NEXT:    or $6, $6, $15
+; MMR6-NEXT:    andi16 $4, $4, 32
+; MMR6-NEXT:    seleqz $6, $6, $4
+; MMR6-NEXT:    sllv $7, $7, $2
+; MMR6-NEXT:    selnez $2, $7, $4
+; MMR6-NEXT:    or $2, $2, $6
+; MMR6-NEXT:    seleqz $2, $2, $13
+; MMR6-NEXT:    or $2, $11, $2
+; MMR6-NEXT:    selnez $2, $2, $3
+; MMR6-NEXT:    or $2, $8, $2
+; MMR6-NEXT:    selnez $6, $9, $13
+; MMR6-NEXT:    seleqz $4, $7, $4
 ; MMR6-NEXT:    seleqz $4, $4, $13
-; MMR6-NEXT:    or $4, $8, $4
+; MMR6-NEXT:    or $4, $6, $4
 ; MMR6-NEXT:    selnez $3, $4, $3
 ; MMR6-NEXT:    or $3, $5, $3
-; MMR6-NEXT:    or $4, $7, $6
-; MMR6-NEXT:    selnez $4, $4, $13
-; MMR6-NEXT:    or $4, $9, $4
-; MMR6-NEXT:    move $5, $1
+; MMR6-NEXT:    or $5, $24, $10
+; MMR6-NEXT:    move $4, $1
 ; MMR6-NEXT:    lw $16, 8($sp) # 4-byte Folded Reload
 ; MMR6-NEXT:    lw $17, 12($sp) # 4-byte Folded Reload
 ; MMR6-NEXT:    addiu $sp, $sp, 16
diff --git a/llvm/test/CodeGen/RISCV/shifts.ll b/llvm/test/CodeGen/RISCV/shifts.ll
--- a/llvm/test/CodeGen/RISCV/shifts.ll
+++ b/llvm/test/CodeGen/RISCV/shifts.ll
@@ -145,115 +145,117 @@
 define i128 @lshr128(i128 %a, i128 %b) nounwind {
 ; RV32I-LABEL: lshr128:
 ; RV32I:       # %bb.0:
-; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw a5, 8(a1)
-; RV32I-NEXT:    lw a4, 12(a1)
-; RV32I-NEXT:    neg a6, a2
-; RV32I-NEXT:    li a3, 64
-; RV32I-NEXT:    li a7, 32
-; RV32I-NEXT:    sub t1, a7, a2
-; RV32I-NEXT:    sll t0, a5, a6
-; RV32I-NEXT:    bltz t1, .LBB6_2
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
+; RV32I-NEXT:    lw a4, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a1)
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    li a5, 64
+; RV32I-NEXT:    sub a6, a5, a4
+; RV32I-NEXT:    andi t0, a6, 63
+; RV32I-NEXT:    addi a7, t0, -32
+; RV32I-NEXT:    xori t0, t0, 31
+; RV32I-NEXT:    bltz a7, .LBB6_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv t2, t0
+; RV32I-NEXT:    srl t5, a3, a7
 ; RV32I-NEXT:    j .LBB6_3
 ; RV32I-NEXT:  .LBB6_2:
-; RV32I-NEXT:    sll a6, a4, a6
-; RV32I-NEXT:    sub a7, a3, a2
-; RV32I-NEXT:    xori a7, a7, 31
-; RV32I-NEXT:    srli t2, a5, 1
-; RV32I-NEXT:    srl a7, t2, a7
-; RV32I-NEXT:    or t2, a6, a7
+; RV32I-NEXT:    srl t1, a2, a6
+; RV32I-NEXT:    slli t2, a3, 1
+; RV32I-NEXT:    sll t2, t2, t0
+; RV32I-NEXT:    or t5, t1, t2
 ; RV32I-NEXT:  .LBB6_3:
-; RV32I-NEXT:    lw t5, 4(a1)
-; RV32I-NEXT:    addi a6, a2, -32
-; RV32I-NEXT:    bgez a6, .LBB6_5
+; RV32I-NEXT:    lw t1, 8(a1)
+; RV32I-NEXT:    lw a1, 12(a1)
+; RV32I-NEXT:    bgez a7, .LBB6_5
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    srl a7, t5, a2
-; RV32I-NEXT:    or t2, t2, a7
+; RV32I-NEXT:    sll t2, t1, a6
+; RV32I-NEXT:    or t5, t5, t2
 ; RV32I-NEXT:  .LBB6_5:
-; RV32I-NEXT:    addi t4, a2, -96
-; RV32I-NEXT:    addi t3, a2, -64
+; RV32I-NEXT:    addi t3, a4, -64
+; RV32I-NEXT:    andi t6, t3, 63
+; RV32I-NEXT:    addi t4, t6, -32
+; RV32I-NEXT:    slli t2, a1, 1
 ; RV32I-NEXT:    bltz t4, .LBB6_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bgeu a2, a3, .LBB6_8
+; RV32I-NEXT:    srl t6, a1, t4
+; RV32I-NEXT:    bgeu a4, a5, .LBB6_8
 ; RV32I-NEXT:    j .LBB6_9
 ; RV32I-NEXT:  .LBB6_7:
-; RV32I-NEXT:    srl a7, a4, t3
-; RV32I-NEXT:    bltu a2, a3, .LBB6_9
+; RV32I-NEXT:    srl s0, t1, t3
+; RV32I-NEXT:    xori t6, t6, 31
+; RV32I-NEXT:    sll t6, t2, t6
+; RV32I-NEXT:    or t6, s0, t6
+; RV32I-NEXT:    bltu a4, a5, .LBB6_9
 ; RV32I-NEXT:  .LBB6_8:
-; RV32I-NEXT:    mv t2, a7
+; RV32I-NEXT:    mv t5, t6
 ; RV32I-NEXT:  .LBB6_9:
-; RV32I-NEXT:    mv a7, t5
-; RV32I-NEXT:    beqz a2, .LBB6_11
+; RV32I-NEXT:    bnez a4, .LBB6_12
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv a7, t2
+; RV32I-NEXT:    bltz a7, .LBB6_13
 ; RV32I-NEXT:  .LBB6_11:
-; RV32I-NEXT:    lw a1, 0(a1)
-; RV32I-NEXT:    xori t2, a2, 31
-; RV32I-NEXT:    bltz a6, .LBB6_13
-; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    srl t5, t5, a6
-; RV32I-NEXT:    bltz t1, .LBB6_14
+; RV32I-NEXT:    sll t5, t1, a7
+; RV32I-NEXT:    bltz a7, .LBB6_14
 ; RV32I-NEXT:    j .LBB6_15
+; RV32I-NEXT:  .LBB6_12:
+; RV32I-NEXT:    mv a2, t5
+; RV32I-NEXT:    bgez a7, .LBB6_11
 ; RV32I-NEXT:  .LBB6_13:
-; RV32I-NEXT:    srl t6, a1, a2
-; RV32I-NEXT:    slli t5, t5, 1
-; RV32I-NEXT:    sll t5, t5, t2
-; RV32I-NEXT:    or t5, t6, t5
-; RV32I-NEXT:    bgez t1, .LBB6_15
+; RV32I-NEXT:    sll t5, a1, a6
+; RV32I-NEXT:    srli t6, t1, 1
+; RV32I-NEXT:    srl t6, t6, t0
+; RV32I-NEXT:    or t5, t5, t6
+; RV32I-NEXT:    bgez a7, .LBB6_15
 ; RV32I-NEXT:  .LBB6_14:
-; RV32I-NEXT:    or t5, t5, t0
+; RV32I-NEXT:    srl t6, a3, a6
+; RV32I-NEXT:    or t5, t5, t6
 ; RV32I-NEXT:  .LBB6_15:
-; RV32I-NEXT:    slli t0, a4, 1
 ; RV32I-NEXT:    bltz t4, .LBB6_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    srl t1, a4, t4
-; RV32I-NEXT:    bgeu a2, a3, .LBB6_18
+; RV32I-NEXT:    li t3, 0
+; RV32I-NEXT:    bgeu a4, a5, .LBB6_18
 ; RV32I-NEXT:    j .LBB6_19
 ; RV32I-NEXT:  .LBB6_17:
-; RV32I-NEXT:    srl t1, a5, t3
-; RV32I-NEXT:    xori t3, t3, 31
-; RV32I-NEXT:    sll t3, t0, t3
-; RV32I-NEXT:    or t1, t1, t3
-; RV32I-NEXT:    bltu a2, a3, .LBB6_19
+; RV32I-NEXT:    srl t3, a1, t3
+; RV32I-NEXT:    bltu a4, a5, .LBB6_19
 ; RV32I-NEXT:  .LBB6_18:
-; RV32I-NEXT:    mv t5, t1
+; RV32I-NEXT:    mv t5, t3
 ; RV32I-NEXT:  .LBB6_19:
-; RV32I-NEXT:    bnez a2, .LBB6_22
+; RV32I-NEXT:    bnez a4, .LBB6_22
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    bltz a6, .LBB6_23
+; RV32I-NEXT:    bltz a7, .LBB6_23
 ; RV32I-NEXT:  .LBB6_21:
-; RV32I-NEXT:    srl a5, a4, a6
-; RV32I-NEXT:    bgeu a2, a3, .LBB6_24
+; RV32I-NEXT:    srl t0, a1, a7
+; RV32I-NEXT:    bgeu a4, a5, .LBB6_24
 ; RV32I-NEXT:    j .LBB6_25
 ; RV32I-NEXT:  .LBB6_22:
-; RV32I-NEXT:    mv a1, t5
-; RV32I-NEXT:    bgez a6, .LBB6_21
+; RV32I-NEXT:    mv a3, t5
+; RV32I-NEXT:    bgez a7, .LBB6_21
 ; RV32I-NEXT:  .LBB6_23:
-; RV32I-NEXT:    srl a5, a5, a2
-; RV32I-NEXT:    sll t0, t0, t2
-; RV32I-NEXT:    or a5, a5, t0
-; RV32I-NEXT:    bltu a2, a3, .LBB6_25
+; RV32I-NEXT:    srl t1, t1, a6
+; RV32I-NEXT:    sll t0, t2, t0
+; RV32I-NEXT:    or t0, t1, t0
+; RV32I-NEXT:    bltu a4, a5, .LBB6_25
 ; RV32I-NEXT:  .LBB6_24:
-; RV32I-NEXT:    li a5, 0
+; RV32I-NEXT:    li t0, 0
 ; RV32I-NEXT:  .LBB6_25:
-; RV32I-NEXT:    bltz a6, .LBB6_27
+; RV32I-NEXT:    bltz a7, .LBB6_27
 ; RV32I-NEXT:  # %bb.26:
-; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bgeu a2, a3, .LBB6_28
+; RV32I-NEXT:    li a1, 0
+; RV32I-NEXT:    bgeu a4, a5, .LBB6_28
 ; RV32I-NEXT:    j .LBB6_29
 ; RV32I-NEXT:  .LBB6_27:
-; RV32I-NEXT:    srl a4, a4, a2
-; RV32I-NEXT:    bltu a2, a3, .LBB6_29
+; RV32I-NEXT:    srl a1, a1, a6
+; RV32I-NEXT:    bltu a4, a5, .LBB6_29
 ; RV32I-NEXT:  .LBB6_28:
-; RV32I-NEXT:    li a4, 0
+; RV32I-NEXT:    li a1, 0
 ; RV32I-NEXT:  .LBB6_29:
-; RV32I-NEXT:    sw a4, 12(a0)
-; RV32I-NEXT:    sw a5, 8(a0)
-; RV32I-NEXT:    sw a1, 0(a0)
-; RV32I-NEXT:    sw a7, 4(a0)
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    sw t0, 8(a0)
+; RV32I-NEXT:    sw a3, 4(a0)
+; RV32I-NEXT:    sw a2, 0(a0)
+; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
+; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: lshr128:
@@ -281,116 +283,114 @@
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    addi sp, sp, -16
 ; RV32I-NEXT:    sw s0, 12(sp) # 4-byte Folded Spill
-; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw a5, 8(a1)
-; RV32I-NEXT:    lw a4, 12(a1)
-; RV32I-NEXT:    neg a6, a2
-; RV32I-NEXT:    li a3, 64
-; RV32I-NEXT:    li a7, 32
-; RV32I-NEXT:    sub t2, a7, a2
-; RV32I-NEXT:    sll t1, a5, a6
-; RV32I-NEXT:    bltz t2, .LBB7_2
+; RV32I-NEXT:    lw a4, 0(a2)
+; RV32I-NEXT:    lw a3, 4(a1)
+; RV32I-NEXT:    lw a2, 0(a1)
+; RV32I-NEXT:    li a5, 64
+; RV32I-NEXT:    sub a6, a5, a4
+; RV32I-NEXT:    andi t0, a6, 63
+; RV32I-NEXT:    addi a7, t0, -32
+; RV32I-NEXT:    xori t0, t0, 31
+; RV32I-NEXT:    bltz a7, .LBB7_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv t4, t1
+; RV32I-NEXT:    srl t4, a3, a7
 ; RV32I-NEXT:    j .LBB7_3
 ; RV32I-NEXT:  .LBB7_2:
-; RV32I-NEXT:    sll a6, a4, a6
-; RV32I-NEXT:    sub a7, a3, a2
-; RV32I-NEXT:    xori a7, a7, 31
-; RV32I-NEXT:    srli t0, a5, 1
-; RV32I-NEXT:    srl a7, t0, a7
-; RV32I-NEXT:    or t4, a6, a7
+; RV32I-NEXT:    srl t1, a2, a6
+; RV32I-NEXT:    slli t2, a3, 1
+; RV32I-NEXT:    sll t2, t2, t0
+; RV32I-NEXT:    or t4, t1, t2
 ; RV32I-NEXT:  .LBB7_3:
-; RV32I-NEXT:    lw t6, 4(a1)
-; RV32I-NEXT:    addi a6, a2, -32
-; RV32I-NEXT:    bgez a6, .LBB7_5
+; RV32I-NEXT:    lw t1, 8(a1)
+; RV32I-NEXT:    lw a1, 12(a1)
+; RV32I-NEXT:    bgez a7, .LBB7_5
 ; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    srl a7, t6, a2
-; RV32I-NEXT:    or t4, t4, a7
+; RV32I-NEXT:    sll t2, t1, a6
+; RV32I-NEXT:    or t4, t4, t2
 ; RV32I-NEXT:  .LBB7_5:
-; RV32I-NEXT:    addi t3, a2, -64
-; RV32I-NEXT:    addi t5, a2, -96
-; RV32I-NEXT:    srai a7, a4, 31
+; RV32I-NEXT:    addi t3, a4, -64
+; RV32I-NEXT:    andi t6, t3, 63
+; RV32I-NEXT:    addi t5, t6, -32
+; RV32I-NEXT:    slli t2, a1, 1
 ; RV32I-NEXT:    bltz t5, .LBB7_7
 ; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    mv t0, a7
-; RV32I-NEXT:    bgeu a2, a3, .LBB7_8
+; RV32I-NEXT:    sra t6, a1, t5
+; RV32I-NEXT:    bgeu a4, a5, .LBB7_8
 ; RV32I-NEXT:    j .LBB7_9
 ; RV32I-NEXT:  .LBB7_7:
-; RV32I-NEXT:    sra t0, a4, t3
-; RV32I-NEXT:    bltu a2, a3, .LBB7_9
+; RV32I-NEXT:    srl s0, t1, t3
+; RV32I-NEXT:    xori t6, t6, 31
+; RV32I-NEXT:    sll t6, t2, t6
+; RV32I-NEXT:    or t6, s0, t6
+; RV32I-NEXT:    bltu a4, a5, .LBB7_9
 ; RV32I-NEXT:  .LBB7_8:
-; RV32I-NEXT:    mv t4, t0
+; RV32I-NEXT:    mv t4, t6
 ; RV32I-NEXT:  .LBB7_9:
-; RV32I-NEXT:    mv t0, t6
-; RV32I-NEXT:    beqz a2, .LBB7_11
+; RV32I-NEXT:    bnez a4, .LBB7_12
 ; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv t0, t4
+; RV32I-NEXT:    bltz a7, .LBB7_13
 ; RV32I-NEXT:  .LBB7_11:
-; RV32I-NEXT:    lw a1, 0(a1)
-; RV32I-NEXT:    xori t4, a2, 31
-; RV32I-NEXT:    bltz a6, .LBB7_13
-; RV32I-NEXT:  # %bb.12:
-; RV32I-NEXT:    srl t6, t6, a6
-; RV32I-NEXT:    bltz t2, .LBB7_14
+; RV32I-NEXT:    sll t6, t1, a7
+; RV32I-NEXT:    bltz a7, .LBB7_14
 ; RV32I-NEXT:    j .LBB7_15
+; RV32I-NEXT:  .LBB7_12:
+; RV32I-NEXT:    mv a2, t4
+; RV32I-NEXT:    bgez a7, .LBB7_11
 ; RV32I-NEXT:  .LBB7_13:
-; RV32I-NEXT:    srl s0, a1, a2
-; RV32I-NEXT:    slli t6, t6, 1
-; RV32I-NEXT:    sll t6, t6, t4
-; RV32I-NEXT:    or t6, s0, t6
-; RV32I-NEXT:    bgez t2, .LBB7_15
+; RV32I-NEXT:    sll t4, a1, a6
+; RV32I-NEXT:    srli t6, t1, 1
+; RV32I-NEXT:    srl t6, t6, t0
+; RV32I-NEXT:    or t6, t4, t6
+; RV32I-NEXT:    bgez a7, .LBB7_15
 ; RV32I-NEXT:  .LBB7_14:
-; RV32I-NEXT:    or t6, t6, t1
+; RV32I-NEXT:    srl t4, a3, a6
+; RV32I-NEXT:    or t6, t6, t4
 ; RV32I-NEXT:  .LBB7_15:
-; RV32I-NEXT:    slli t1, a4, 1
+; RV32I-NEXT:    srai t4, a1, 31
 ; RV32I-NEXT:    bltz t5, .LBB7_17
 ; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    sra t2, a4, t5
-; RV32I-NEXT:    bgeu a2, a3, .LBB7_18
+; RV32I-NEXT:    mv t3, t4
+; RV32I-NEXT:    bgeu a4, a5, .LBB7_18
 ; RV32I-NEXT:    j .LBB7_19
 ; RV32I-NEXT:  .LBB7_17:
-; RV32I-NEXT:    srl t2, a5, t3
-; RV32I-NEXT:    xori t3, t3, 31
-; RV32I-NEXT:    sll t3, t1, t3
-; RV32I-NEXT:    or t2, t2, t3
-; RV32I-NEXT:    bltu a2, a3, .LBB7_19
+; RV32I-NEXT:    sra t3, a1, t3
+; RV32I-NEXT:    bltu a4, a5, .LBB7_19
 ; RV32I-NEXT:  .LBB7_18:
-; RV32I-NEXT:    mv t6, t2
+; RV32I-NEXT:    mv t6, t3
 ; RV32I-NEXT:  .LBB7_19:
-; RV32I-NEXT:    bnez a2, .LBB7_22
+; RV32I-NEXT:    bnez a4, .LBB7_22
 ; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    bltz a6, .LBB7_23
+; RV32I-NEXT:    bltz a7, .LBB7_23
 ; RV32I-NEXT:  .LBB7_21:
-; RV32I-NEXT:    sra a5, a4, a6
-; RV32I-NEXT:    bgeu a2, a3, .LBB7_24
+; RV32I-NEXT:    sra t0, a1, a7
+; RV32I-NEXT:    bgeu a4, a5, .LBB7_24
 ; RV32I-NEXT:    j .LBB7_25
 ; RV32I-NEXT:  .LBB7_22:
-; RV32I-NEXT:    mv a1, t6
-; RV32I-NEXT:    bgez a6, .LBB7_21
+; RV32I-NEXT:    mv a3, t6
+; RV32I-NEXT:    bgez a7, .LBB7_21
 ; RV32I-NEXT:  .LBB7_23:
-; RV32I-NEXT:    srl a5, a5, a2
-; RV32I-NEXT:    sll t1, t1, t4
-; RV32I-NEXT:    or a5, a5, t1
-; RV32I-NEXT:    bltu a2, a3, .LBB7_25
+; RV32I-NEXT:    srl t1, t1, a6
+; RV32I-NEXT:    sll t0, t2, t0
+; RV32I-NEXT:    or t0, t1, t0
+; RV32I-NEXT:    bltu a4, a5, .LBB7_25
 ; RV32I-NEXT:  .LBB7_24:
-; RV32I-NEXT:    mv a5, a7
+; RV32I-NEXT:    mv t0, t4
 ; RV32I-NEXT:  .LBB7_25:
-; RV32I-NEXT:    bltz a6, .LBB7_27
+; RV32I-NEXT:    bltz a7, .LBB7_27
 ; RV32I-NEXT:  # %bb.26:
-; RV32I-NEXT:    mv a4, a7
-; RV32I-NEXT:    bgeu a2, a3, .LBB7_28
+; RV32I-NEXT:    mv a1, t4
+; RV32I-NEXT:    bgeu a4, a5, .LBB7_28
 ; RV32I-NEXT:    j .LBB7_29
 ; RV32I-NEXT:  .LBB7_27:
-; RV32I-NEXT:    sra a4, a4, a2
-; RV32I-NEXT:    bltu a2, a3, .LBB7_29
+; RV32I-NEXT:    sra a1, a1, a6
+; RV32I-NEXT:    bltu a4, a5, .LBB7_29
 ; RV32I-NEXT:  .LBB7_28:
-; RV32I-NEXT:    mv a4, a7
+; RV32I-NEXT:    mv a1, t4
 ; RV32I-NEXT:  .LBB7_29:
-; RV32I-NEXT:    sw a4, 12(a0)
-; RV32I-NEXT:    sw a5, 8(a0)
-; RV32I-NEXT:    sw a1, 0(a0)
-; RV32I-NEXT:    sw t0, 4(a0)
+; RV32I-NEXT:    sw a1, 12(a0)
+; RV32I-NEXT:    sw t0, 8(a0)
+; RV32I-NEXT:    sw a3, 4(a0)
+; RV32I-NEXT:    sw a2, 0(a0)
 ; RV32I-NEXT:    lw s0, 12(sp) # 4-byte Folded Reload
 ; RV32I-NEXT:    addi sp, sp, 16
 ; RV32I-NEXT:    ret
@@ -419,114 +419,115 @@
 ; RV32I-LABEL: shl128:
 ; RV32I:       # %bb.0:
 ; RV32I-NEXT:    lw a2, 0(a2)
-; RV32I-NEXT:    lw a5, 4(a1)
+; RV32I-NEXT:    lw a7, 4(a1)
 ; RV32I-NEXT:    lw a4, 0(a1)
-; RV32I-NEXT:    neg a6, a2
 ; RV32I-NEXT:    li a3, 64
-; RV32I-NEXT:    li a7, 32
-; RV32I-NEXT:    sub t1, a7, a2
-; RV32I-NEXT:    srl t0, a5, a6
-; RV32I-NEXT:    bltz t1, .LBB8_2
+; RV32I-NEXT:    sub a5, a3, a2
+; RV32I-NEXT:    andi t0, a5, 63
+; RV32I-NEXT:    addi a6, t0, -32
+; RV32I-NEXT:    xori t0, t0, 31
+; RV32I-NEXT:    bltz a6, .LBB8_2
 ; RV32I-NEXT:  # %bb.1:
-; RV32I-NEXT:    mv t2, t0
-; RV32I-NEXT:    j .LBB8_3
+; RV32I-NEXT:    srl t6, a7, a6
+; RV32I-NEXT:    lw t5, 8(a1)
+; RV32I-NEXT:    bltz a6, .LBB8_3
+; RV32I-NEXT:    j .LBB8_4
 ; RV32I-NEXT:  .LBB8_2:
-; RV32I-NEXT:    srl a6, a4, a6
-; RV32I-NEXT:    sub a7, a3, a2
-; RV32I-NEXT:    xori a7, a7, 31
-; RV32I-NEXT:    slli t2, a5, 1
-; RV32I-NEXT:    sll a7, t2, a7
-; RV32I-NEXT:    or t2, a6, a7
-; RV32I-NEXT:  .LBB8_3:
+; RV32I-NEXT:    srl t1, a4, a5
+; RV32I-NEXT:    slli t2, a7, 1
+; RV32I-NEXT:    sll t2, t2, t0
+; RV32I-NEXT:    or t6, t1, t2
 ; RV32I-NEXT:    lw t5, 8(a1)
-; RV32I-NEXT:    addi a6, a2, -32
-; RV32I-NEXT:    bgez a6, .LBB8_5
-; RV32I-NEXT:  # %bb.4:
-; RV32I-NEXT:    sll a7, t5, a2
-; RV32I-NEXT:    or t2, t2, a7
-; RV32I-NEXT:  .LBB8_5:
-; RV32I-NEXT:    addi t4, a2, -96
-; RV32I-NEXT:    addi t3, a2, -64
-; RV32I-NEXT:    bltz t4, .LBB8_7
-; RV32I-NEXT:  # %bb.6:
-; RV32I-NEXT:    li a7, 0
-; RV32I-NEXT:    bgeu a2, a3, .LBB8_8
-; RV32I-NEXT:    j .LBB8_9
+; RV32I-NEXT:    bgez a6, .LBB8_4
+; RV32I-NEXT:  .LBB8_3:
+; RV32I-NEXT:    sll t1, t5, a5
+; RV32I-NEXT:    or t6, t6, t1
+; RV32I-NEXT:  .LBB8_4:
+; RV32I-NEXT:    addi t2, a2, -64
+; RV32I-NEXT:    andi t3, t2, 63
+; RV32I-NEXT:    addi t4, t3, -32
+; RV32I-NEXT:    bltz t4, .LBB8_6
+; RV32I-NEXT:  # %bb.5:
+; RV32I-NEXT:    li t1, 0
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_7
+; RV32I-NEXT:    j .LBB8_8
+; RV32I-NEXT:  .LBB8_6:
+; RV32I-NEXT:    sll t1, a4, t2
+; RV32I-NEXT:    bltu a2, a3, .LBB8_8
 ; RV32I-NEXT:  .LBB8_7:
-; RV32I-NEXT:    sll a7, a4, t3
-; RV32I-NEXT:    bltu a2, a3, .LBB8_9
+; RV32I-NEXT:    mv t6, t1
 ; RV32I-NEXT:  .LBB8_8:
-; RV32I-NEXT:    mv t2, a7
-; RV32I-NEXT:  .LBB8_9:
-; RV32I-NEXT:    mv a7, t5
-; RV32I-NEXT:    beqz a2, .LBB8_11
-; RV32I-NEXT:  # %bb.10:
-; RV32I-NEXT:    mv a7, t2
-; RV32I-NEXT:  .LBB8_11:
+; RV32I-NEXT:    mv t1, t5
+; RV32I-NEXT:    bnez a2, .LBB8_11
+; RV32I-NEXT:  # %bb.9:
 ; RV32I-NEXT:    lw a1, 12(a1)
-; RV32I-NEXT:    xori t2, a2, 31
-; RV32I-NEXT:    bltz a6, .LBB8_13
-; RV32I-NEXT:  # %bb.12:
+; RV32I-NEXT:    bltz a6, .LBB8_12
+; RV32I-NEXT:  .LBB8_10:
 ; RV32I-NEXT:    sll t5, t5, a6
-; RV32I-NEXT:    bltz t1, .LBB8_14
-; RV32I-NEXT:    j .LBB8_15
-; RV32I-NEXT:  .LBB8_13:
-; RV32I-NEXT:    sll t6, a1, a2
+; RV32I-NEXT:    bltz a6, .LBB8_13
+; RV32I-NEXT:    j .LBB8_14
+; RV32I-NEXT:  .LBB8_11:
+; RV32I-NEXT:    mv t1, t6
+; RV32I-NEXT:    lw a1, 12(a1)
+; RV32I-NEXT:    bgez a6, .LBB8_10
+; RV32I-NEXT:  .LBB8_12:
+; RV32I-NEXT:    sll t6, a1, a5
 ; RV32I-NEXT:    srli t5, t5, 1
-; RV32I-NEXT:    srl t5, t5, t2
+; RV32I-NEXT:    srl t5, t5, t0
 ; RV32I-NEXT:    or t5, t6, t5
-; RV32I-NEXT:    bgez t1, .LBB8_15
+; RV32I-NEXT:    bgez a6, .LBB8_14
+; RV32I-NEXT:  .LBB8_13:
+; RV32I-NEXT:    srl t6, a7, a5
+; RV32I-NEXT:    or t5, t5, t6
 ; RV32I-NEXT:  .LBB8_14:
-; RV32I-NEXT:    or t5, t5, t0
-; RV32I-NEXT:  .LBB8_15:
-; RV32I-NEXT:    srli t0, a4, 1
-; RV32I-NEXT:    bltz t4, .LBB8_17
-; RV32I-NEXT:  # %bb.16:
-; RV32I-NEXT:    sll t1, a4, t4
-; RV32I-NEXT:    bgeu a2, a3, .LBB8_18
-; RV32I-NEXT:    j .LBB8_19
-; RV32I-NEXT:  .LBB8_17:
-; RV32I-NEXT:    sll t1, a5, t3
+; RV32I-NEXT:    srli t6, a4, 1
+; RV32I-NEXT:    bltz t4, .LBB8_16
+; RV32I-NEXT:  # %bb.15:
+; RV32I-NEXT:    sll t2, a4, t4
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_17
+; RV32I-NEXT:    j .LBB8_18
+; RV32I-NEXT:  .LBB8_16:
+; RV32I-NEXT:    sll t2, a7, t2
 ; RV32I-NEXT:    xori t3, t3, 31
-; RV32I-NEXT:    srl t3, t0, t3
-; RV32I-NEXT:    or t1, t1, t3
-; RV32I-NEXT:    bltu a2, a3, .LBB8_19
+; RV32I-NEXT:    srl t3, t6, t3
+; RV32I-NEXT:    or t2, t2, t3
+; RV32I-NEXT:    bltu a2, a3, .LBB8_18
+; RV32I-NEXT:  .LBB8_17:
+; RV32I-NEXT:    mv t5, t2
 ; RV32I-NEXT:  .LBB8_18:
-; RV32I-NEXT:    mv t5, t1
-; RV32I-NEXT:  .LBB8_19:
-; RV32I-NEXT:    bnez a2, .LBB8_22
-; RV32I-NEXT:  # %bb.20:
-; RV32I-NEXT:    bltz a6, .LBB8_23
+; RV32I-NEXT:    bnez a2, .LBB8_21
+; RV32I-NEXT:  # %bb.19:
+; RV32I-NEXT:    bltz a6, .LBB8_22
+; RV32I-NEXT:  .LBB8_20:
+; RV32I-NEXT:    sll a7, a4, a6
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_23
+; RV32I-NEXT:    j .LBB8_24
 ; RV32I-NEXT:  .LBB8_21:
-; RV32I-NEXT:    sll a5, a4, a6
-; RV32I-NEXT:    bgeu a2, a3, .LBB8_24
-; RV32I-NEXT:    j .LBB8_25
-; RV32I-NEXT:  .LBB8_22:
 ; RV32I-NEXT:    mv a1, t5
-; RV32I-NEXT:    bgez a6, .LBB8_21
+; RV32I-NEXT:    bgez a6, .LBB8_20
+; RV32I-NEXT:  .LBB8_22:
+; RV32I-NEXT:    sll a7, a7, a5
+; RV32I-NEXT:    srl t0, t6, t0
+; RV32I-NEXT:    or a7, a7, t0
+; RV32I-NEXT:    bltu a2, a3, .LBB8_24
 ; RV32I-NEXT:  .LBB8_23:
-; RV32I-NEXT:    sll a5, a5, a2
-; RV32I-NEXT:    srl t0, t0, t2
-; RV32I-NEXT:    or a5, a5, t0
-; RV32I-NEXT:    bltu a2, a3, .LBB8_25
+; RV32I-NEXT:    li a7, 0
 ; RV32I-NEXT:  .LBB8_24:
-; RV32I-NEXT:    li a5, 0
-; RV32I-NEXT:  .LBB8_25:
-; RV32I-NEXT:    bltz a6, .LBB8_27
-; RV32I-NEXT:  # %bb.26:
+; RV32I-NEXT:    bltz a6, .LBB8_26
+; RV32I-NEXT:  # %bb.25:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:    bgeu a2, a3, .LBB8_28
-; RV32I-NEXT:    j .LBB8_29
+; RV32I-NEXT:    bgeu a2, a3, .LBB8_27
+; RV32I-NEXT:    j .LBB8_28
+; RV32I-NEXT:  .LBB8_26:
+; RV32I-NEXT:    sll a4, a4, a5
+; RV32I-NEXT:    bltu a2, a3, .LBB8_28
 ; RV32I-NEXT:  .LBB8_27:
-; RV32I-NEXT:    sll a4, a4, a2
-; RV32I-NEXT:    bltu a2, a3, .LBB8_29
-; RV32I-NEXT:  .LBB8_28:
 ; RV32I-NEXT:    li a4, 0
-; RV32I-NEXT:  .LBB8_29:
+; RV32I-NEXT:  .LBB8_28:
 ; RV32I-NEXT:    sw a4, 0(a0)
-; RV32I-NEXT:    sw a5, 4(a0)
+; RV32I-NEXT:    sw a7, 4(a0)
 ; RV32I-NEXT:    sw a1, 12(a0)
-; RV32I-NEXT:    sw a7, 8(a0)
+; RV32I-NEXT:    sw t1, 8(a0)
 ; RV32I-NEXT:    ret
 ;
 ; RV64I-LABEL: shl128:
diff --git a/llvm/test/CodeGen/SystemZ/shift-12.ll b/llvm/test/CodeGen/SystemZ/shift-12.ll
--- a/llvm/test/CodeGen/SystemZ/shift-12.ll
+++ b/llvm/test/CodeGen/SystemZ/shift-12.ll
@@ -125,21 +125,21 @@
 ; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    lg %r0, 8(%r3)
-; CHECK-NEXT:    lg %r1, 0(%r3)
-; CHECK-NEXT:    risblg %r3, %r4, 25, 159, 0
-; CHECK-NEXT:    lcr %r14, %r3
-; CHECK-NEXT:    sllg %r5, %r1, 0(%r4)
-; CHECK-NEXT:    srlg %r14, %r0, 0(%r14)
+; CHECK-NEXT:    lg %r0, 0(%r3)
+; CHECK-NEXT:    lg %r1, 8(%r3)
+; CHECK-NEXT:    lhi %r3, 64
+; CHECK-NEXT:    sr %r3, %r4
+; CHECK-NEXT:    srlg %r5, %r1, 0(%r3)
+; CHECK-NEXT:    sllg %r14, %r0, 0(%r3)
 ; CHECK-NEXT:    ogr %r5, %r14
-; CHECK-NEXT:    sllg %r3, %r0, -64(%r3)
+; CHECK-NEXT:    sllg %r14, %r1, -64(%r4)
 ; CHECK-NEXT:    tmll %r4, 127
-; CHECK-NEXT:    locgrle %r3, %r5
-; CHECK-NEXT:    sllg %r0, %r0, 0(%r4)
-; CHECK-NEXT:    locgre %r3, %r1
+; CHECK-NEXT:    locgrle %r14, %r5
+; CHECK-NEXT:    locgre %r14, %r0
+; CHECK-NEXT:    sllg %r0, %r1, 0(%r3)
 ; CHECK-NEXT:    locghinle %r0, 0
 ; CHECK-NEXT:    stg %r0, 8(%r2)
-; CHECK-NEXT:    stg %r3, 0(%r2)
+; CHECK-NEXT:    stg %r14, 0(%r2)
 ; CHECK-NEXT:    lmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    br %r14
   %and = and i32 %sh, 127
@@ -154,21 +154,21 @@
 ; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    lg %r0, 0(%r3)
-; CHECK-NEXT:    lg %r1, 8(%r3)
-; CHECK-NEXT:    risblg %r3, %r4, 25, 159, 0
-; CHECK-NEXT:    lcr %r14, %r3
-; CHECK-NEXT:    srlg %r5, %r1, 0(%r4)
-; CHECK-NEXT:    sllg %r14, %r0, 0(%r14)
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lg %r1, 0(%r3)
+; CHECK-NEXT:    lhi %r3, 64
+; CHECK-NEXT:    sr %r3, %r4
+; CHECK-NEXT:    sllg %r5, %r1, 0(%r3)
+; CHECK-NEXT:    srlg %r14, %r0, 0(%r3)
 ; CHECK-NEXT:    ogr %r5, %r14
-; CHECK-NEXT:    srlg %r3, %r0, -64(%r3)
+; CHECK-NEXT:    srlg %r14, %r1, -64(%r4)
 ; CHECK-NEXT:    tmll %r4, 127
-; CHECK-NEXT:    locgrle %r3, %r5
-; CHECK-NEXT:    srlg %r0, %r0, 0(%r4)
-; CHECK-NEXT:    locgre %r3, %r1
+; CHECK-NEXT:    locgrle %r14, %r5
+; CHECK-NEXT:    locgre %r14, %r0
+; CHECK-NEXT:    srlg %r0, %r1, 0(%r3)
 ; CHECK-NEXT:    locghinle %r0, 0
 ; CHECK-NEXT:    stg %r0, 0(%r2)
-; CHECK-NEXT:    stg %r3, 8(%r2)
+; CHECK-NEXT:    stg %r14, 8(%r2)
 ; CHECK-NEXT:    lmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    br %r14
   %and = and i32 %sh, 127
@@ -183,22 +183,22 @@
 ; CHECK-NEXT:    stmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    .cfi_offset %r14, -48
 ; CHECK-NEXT:    .cfi_offset %r15, -40
-; CHECK-NEXT:    lg %r0, 0(%r3)
-; CHECK-NEXT:    lg %r1, 8(%r3)
-; CHECK-NEXT:    risblg %r3, %r4, 25, 159, 0
-; CHECK-NEXT:    lcr %r14, %r3
-; CHECK-NEXT:    srlg %r5, %r1, 0(%r4)
-; CHECK-NEXT:    sllg %r14, %r0, 0(%r14)
+; CHECK-NEXT:    lg %r0, 8(%r3)
+; CHECK-NEXT:    lg %r1, 0(%r3)
+; CHECK-NEXT:    lhi %r3, 64
+; CHECK-NEXT:    sr %r3, %r4
+; CHECK-NEXT:    sllg %r5, %r1, 0(%r3)
+; CHECK-NEXT:    srlg %r14, %r0, 0(%r3)
 ; CHECK-NEXT:    ogr %r5, %r14
-; CHECK-NEXT:    srag %r14, %r0, 0(%r4)
-; CHECK-NEXT:    srag %r3, %r0, -64(%r3)
-; CHECK-NEXT:    srag %r0, %r0, 63
+; CHECK-NEXT:    srag %r3, %r1, 0(%r3)
+; CHECK-NEXT:    srag %r14, %r1, -64(%r4)
+; CHECK-NEXT:    srag %r1, %r1, 63
 ; CHECK-NEXT:    tmll %r4, 127
-; CHECK-NEXT:    locgrle %r3, %r5
-; CHECK-NEXT:    locgre %r3, %r1
-; CHECK-NEXT:    locgrle %r0, %r14
-; CHECK-NEXT:    stg %r0, 0(%r2)
-; CHECK-NEXT:    stg %r3, 8(%r2)
+; CHECK-NEXT:    locgrle %r14, %r5
+; CHECK-NEXT:    locgre %r14, %r0
+; CHECK-NEXT:    locgrle %r1, %r3
+; CHECK-NEXT:    stg %r1, 0(%r2)
+; CHECK-NEXT:    stg %r14, 8(%r2)
 ; CHECK-NEXT:    lmg %r14, %r15, 112(%r15)
 ; CHECK-NEXT:    br %r14
   %and = and i32 %sh, 127
diff --git a/llvm/test/CodeGen/X86/scheduler-backtracking.ll b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
--- a/llvm/test/CodeGen/X86/scheduler-backtracking.ll
+++ b/llvm/test/CodeGen/X86/scheduler-backtracking.ll
@@ -15,41 +15,39 @@
 ; ILP-NEXT:    pushq %r14
 ; ILP-NEXT:    pushq %rbx
 ; ILP-NEXT:    movq %rdi, %rax
-; ILP-NEXT:    xorl %r8d, %r8d
 ; ILP-NEXT:    addl %esi, %esi
-; ILP-NEXT:    leal 3(%rsi), %r9d
-; ILP-NEXT:    movl $1, %r11d
-; ILP-NEXT:    xorl %r14d, %r14d
-; ILP-NEXT:    movl %r9d, %ecx
-; ILP-NEXT:    shldq %cl, %r11, %r14
+; ILP-NEXT:    movb $125, %r10b
+; ILP-NEXT:    subb %sil, %r10b
+; ILP-NEXT:    xorl %r8d, %r8d
+; ILP-NEXT:    leal -125(%rsi), %r9d
 ; ILP-NEXT:    movl $1, %edx
-; ILP-NEXT:    shlq %cl, %rdx
-; ILP-NEXT:    leal -125(%rsi), %r10d
-; ILP-NEXT:    xorl %ebx, %ebx
-; ILP-NEXT:    movl %r10d, %ecx
-; ILP-NEXT:    shldq %cl, %r11, %rbx
-; ILP-NEXT:    testb $64, %r9b
-; ILP-NEXT:    cmovneq %rdx, %r14
-; ILP-NEXT:    cmovneq %r8, %rdx
+; ILP-NEXT:    xorl %r11d, %r11d
+; ILP-NEXT:    movl %r9d, %ecx
+; ILP-NEXT:    shldq %cl, %rdx, %r11
 ; ILP-NEXT:    movl $1, %edi
 ; ILP-NEXT:    shlq %cl, %rdi
-; ILP-NEXT:    movb $125, %cl
-; ILP-NEXT:    subb %sil, %cl
-; ILP-NEXT:    shrdq %cl, %r8, %r11
-; ILP-NEXT:    testb $64, %cl
-; ILP-NEXT:    cmovneq %r8, %r11
+; ILP-NEXT:    movl $1, %r14d
+; ILP-NEXT:    movl %r10d, %ecx
+; ILP-NEXT:    shrdq %cl, %r8, %r14
+; ILP-NEXT:    xorl %ebx, %ebx
+; ILP-NEXT:    shldq %cl, %rdx, %rbx
+; ILP-NEXT:    shlq %cl, %rdx
 ; ILP-NEXT:    testb $64, %r10b
-; ILP-NEXT:    cmovneq %rdi, %rbx
+; ILP-NEXT:    cmovneq %rdx, %rbx
+; ILP-NEXT:    cmovneq %r8, %rdx
+; ILP-NEXT:    cmovneq %r8, %r14
+; ILP-NEXT:    testb $64, %r9b
+; ILP-NEXT:    cmovneq %rdi, %r11
 ; ILP-NEXT:    cmovneq %r8, %rdi
-; ILP-NEXT:    testb %r9b, %r9b
-; ILP-NEXT:    cmovsq %r8, %r14
+; ILP-NEXT:    addb $3, %sil
+; ILP-NEXT:    cmovsq %r8, %rbx
 ; ILP-NEXT:    cmovsq %r8, %rdx
-; ILP-NEXT:    movq %r14, 8(%rax)
+; ILP-NEXT:    movq %rbx, 8(%rax)
 ; ILP-NEXT:    movq %rdx, (%rax)
-; ILP-NEXT:    cmovnsq %r8, %rbx
-; ILP-NEXT:    cmoveq %r8, %rbx
-; ILP-NEXT:    movq %rbx, 24(%rax)
-; ILP-NEXT:    cmovnsq %r11, %rdi
+; ILP-NEXT:    cmovnsq %r8, %r11
+; ILP-NEXT:    cmoveq %r8, %r11
+; ILP-NEXT:    movq %r11, 24(%rax)
+; ILP-NEXT:    cmovnsq %r14, %rdi
 ; ILP-NEXT:    cmoveq %r8, %rdi
 ; ILP-NEXT:    movq %rdi, 16(%rax)
 ; ILP-NEXT:    popq %rbx
@@ -58,7 +56,6 @@
 ;
 ; HYBRID-LABEL: test1:
 ; HYBRID:       # %bb.0:
-; HYBRID-NEXT:    pushq %rbx
 ; HYBRID-NEXT:    movq %rdi, %rax
 ; HYBRID-NEXT:    addl %esi, %esi
 ; HYBRID-NEXT:    movb $125, %cl
@@ -67,43 +64,36 @@
 ; HYBRID-NEXT:    movl $1, %edi
 ; HYBRID-NEXT:    movl $1, %r9d
 ; HYBRID-NEXT:    shrdq %cl, %r8, %r9
+; HYBRID-NEXT:    xorl %r10d, %r10d
+; HYBRID-NEXT:    shldq %cl, %rdi, %r10
+; HYBRID-NEXT:    movl $1, %r11d
+; HYBRID-NEXT:    shlq %cl, %r11
 ; HYBRID-NEXT:    testb $64, %cl
+; HYBRID-NEXT:    cmovneq %r11, %r10
 ; HYBRID-NEXT:    cmovneq %r8, %r9
-; HYBRID-NEXT:    leal 3(%rsi), %r10d
-; HYBRID-NEXT:    xorl %r11d, %r11d
-; HYBRID-NEXT:    movl %r10d, %ecx
-; HYBRID-NEXT:    shldq %cl, %rdi, %r11
-; HYBRID-NEXT:    addb $-125, %sil
-; HYBRID-NEXT:    xorl %ebx, %ebx
-; HYBRID-NEXT:    movl %esi, %ecx
-; HYBRID-NEXT:    shldq %cl, %rdi, %rbx
-; HYBRID-NEXT:    movl $1, %edx
-; HYBRID-NEXT:    shlq %cl, %rdx
-; HYBRID-NEXT:    testb $64, %sil
-; HYBRID-NEXT:    cmovneq %rdx, %rbx
-; HYBRID-NEXT:    cmovneq %r8, %rdx
-; HYBRID-NEXT:    movl %r10d, %ecx
+; HYBRID-NEXT:    cmovneq %r8, %r11
+; HYBRID-NEXT:    leal -125(%rsi), %ecx
+; HYBRID-NEXT:    xorl %edx, %edx
+; HYBRID-NEXT:    shldq %cl, %rdi, %rdx
 ; HYBRID-NEXT:    shlq %cl, %rdi
-; HYBRID-NEXT:    testb $64, %r10b
-; HYBRID-NEXT:    cmovneq %rdi, %r11
+; HYBRID-NEXT:    testb $64, %cl
+; HYBRID-NEXT:    cmovneq %rdi, %rdx
 ; HYBRID-NEXT:    cmovneq %r8, %rdi
-; HYBRID-NEXT:    testb %r10b, %r10b
+; HYBRID-NEXT:    addb $3, %sil
+; HYBRID-NEXT:    cmovsq %r8, %r10
+; HYBRID-NEXT:    movq %r10, 8(%rax)
 ; HYBRID-NEXT:    cmovsq %r8, %r11
-; HYBRID-NEXT:    movq %r11, 8(%rax)
-; HYBRID-NEXT:    cmovsq %r8, %rdi
-; HYBRID-NEXT:    movq %rdi, (%rax)
-; HYBRID-NEXT:    cmovnsq %r8, %rbx
-; HYBRID-NEXT:    cmoveq %r8, %rbx
-; HYBRID-NEXT:    movq %rbx, 24(%rax)
-; HYBRID-NEXT:    cmovnsq %r9, %rdx
+; HYBRID-NEXT:    movq %r11, (%rax)
+; HYBRID-NEXT:    cmovnsq %r8, %rdx
 ; HYBRID-NEXT:    cmoveq %r8, %rdx
-; HYBRID-NEXT:    movq %rdx, 16(%rax)
-; HYBRID-NEXT:    popq %rbx
+; HYBRID-NEXT:    movq %rdx, 24(%rax)
+; HYBRID-NEXT:    cmovnsq %r9, %rdi
+; HYBRID-NEXT:    cmoveq %r8, %rdi
+; HYBRID-NEXT:    movq %rdi, 16(%rax)
 ; HYBRID-NEXT:    retq
 ;
 ; BURR-LABEL: test1:
 ; BURR:       # %bb.0:
-; BURR-NEXT:    pushq %rbx
 ; BURR-NEXT:    movq %rdi, %rax
 ; BURR-NEXT:    addl %esi, %esi
 ; BURR-NEXT:    movb $125, %cl
@@ -112,129 +102,117 @@
 ; BURR-NEXT:    movl $1, %edi
 ; BURR-NEXT:    movl $1, %r9d
 ; BURR-NEXT:    shrdq %cl, %r8, %r9
+; BURR-NEXT:    xorl %r10d, %r10d
+; BURR-NEXT:    shldq %cl, %rdi, %r10
+; BURR-NEXT:    movl $1, %r11d
+; BURR-NEXT:    shlq %cl, %r11
 ; BURR-NEXT:    testb $64, %cl
+; BURR-NEXT:    cmovneq %r11, %r10
 ; BURR-NEXT:    cmovneq %r8, %r9
-; BURR-NEXT:    leal 3(%rsi), %r10d
-; BURR-NEXT:    xorl %r11d, %r11d
-; BURR-NEXT:    movl %r10d, %ecx
-; BURR-NEXT:    shldq %cl, %rdi, %r11
-; BURR-NEXT:    addb $-125, %sil
-; BURR-NEXT:    xorl %ebx, %ebx
-; BURR-NEXT:    movl %esi, %ecx
-; BURR-NEXT:    shldq %cl, %rdi, %rbx
-; BURR-NEXT:    movl $1, %edx
-; BURR-NEXT:    shlq %cl, %rdx
-; BURR-NEXT:    testb $64, %sil
-; BURR-NEXT:    cmovneq %rdx, %rbx
-; BURR-NEXT:    cmovneq %r8, %rdx
-; BURR-NEXT:    movl %r10d, %ecx
+; BURR-NEXT:    cmovneq %r8, %r11
+; BURR-NEXT:    leal -125(%rsi), %ecx
+; BURR-NEXT:    xorl %edx, %edx
+; BURR-NEXT:    shldq %cl, %rdi, %rdx
 ; BURR-NEXT:    shlq %cl, %rdi
-; BURR-NEXT:    testb $64, %r10b
-; BURR-NEXT:    cmovneq %rdi, %r11
+; BURR-NEXT:    testb $64, %cl
+; BURR-NEXT:    cmovneq %rdi, %rdx
 ; BURR-NEXT:    cmovneq %r8, %rdi
-; BURR-NEXT:    testb %r10b, %r10b
+; BURR-NEXT:    addb $3, %sil
+; BURR-NEXT:    cmovsq %r8, %r10
+; BURR-NEXT:    movq %r10, 8(%rax)
 ; BURR-NEXT:    cmovsq %r8, %r11
-; BURR-NEXT:    movq %r11, 8(%rax)
-; BURR-NEXT:    cmovsq %r8, %rdi
-; BURR-NEXT:    movq %rdi, (%rax)
-; BURR-NEXT:    cmovnsq %r8, %rbx
-; BURR-NEXT:    cmoveq %r8, %rbx
-; BURR-NEXT:    movq %rbx, 24(%rax)
-; BURR-NEXT:    cmovnsq %r9, %rdx
+; BURR-NEXT:    movq %r11, (%rax)
+; BURR-NEXT:    cmovnsq %r8, %rdx
 ; BURR-NEXT:    cmoveq %r8, %rdx
-; BURR-NEXT:    movq %rdx, 16(%rax)
-; BURR-NEXT:    popq %rbx
+; BURR-NEXT:    movq %rdx, 24(%rax)
+; BURR-NEXT:    cmovnsq %r9, %rdi
+; BURR-NEXT:    cmoveq %r8, %rdi
+; BURR-NEXT:    movq %rdi, 16(%rax)
 ; BURR-NEXT:    retq
 ;
 ; SRC-LABEL: test1:
 ; SRC:       # %bb.0:
-; SRC-NEXT:    pushq %rbx
 ; SRC-NEXT:    movq %rdi, %rax
 ; SRC-NEXT:    addl %esi, %esi
-; SRC-NEXT:    leal 3(%rsi), %r9d
 ; SRC-NEXT:    movb $125, %cl
 ; SRC-NEXT:    subb %sil, %cl
 ; SRC-NEXT:    xorl %r8d, %r8d
-; SRC-NEXT:    movl $1, %edi
+; SRC-NEXT:    movl $1, %edx
 ; SRC-NEXT:    movl $1, %r10d
 ; SRC-NEXT:    shrdq %cl, %r8, %r10
+; SRC-NEXT:    xorl %r9d, %r9d
+; SRC-NEXT:    shldq %cl, %rdx, %r9
+; SRC-NEXT:    movl $1, %r11d
+; SRC-NEXT:    shlq %cl, %r11
 ; SRC-NEXT:    testb $64, %cl
+; SRC-NEXT:    cmovneq %r11, %r9
+; SRC-NEXT:    cmovneq %r8, %r11
 ; SRC-NEXT:    cmovneq %r8, %r10
-; SRC-NEXT:    addb $-125, %sil
-; SRC-NEXT:    xorl %edx, %edx
-; SRC-NEXT:    movl %esi, %ecx
-; SRC-NEXT:    shldq %cl, %rdi, %rdx
-; SRC-NEXT:    xorl %r11d, %r11d
-; SRC-NEXT:    movl %r9d, %ecx
-; SRC-NEXT:    shldq %cl, %rdi, %r11
-; SRC-NEXT:    movl $1, %ebx
-; SRC-NEXT:    shlq %cl, %rbx
-; SRC-NEXT:    testb $64, %r9b
-; SRC-NEXT:    cmovneq %rbx, %r11
-; SRC-NEXT:    cmovneq %r8, %rbx
-; SRC-NEXT:    movl %esi, %ecx
-; SRC-NEXT:    shlq %cl, %rdi
-; SRC-NEXT:    testb $64, %sil
-; SRC-NEXT:    cmovneq %rdi, %rdx
-; SRC-NEXT:    cmovneq %r8, %rdi
-; SRC-NEXT:    testb %r9b, %r9b
-; SRC-NEXT:    cmovnsq %r10, %rdi
-; SRC-NEXT:    cmoveq %r8, %rdi
-; SRC-NEXT:    cmovnsq %r8, %rdx
+; SRC-NEXT:    leal -125(%rsi), %ecx
+; SRC-NEXT:    xorl %edi, %edi
+; SRC-NEXT:    shldq %cl, %rdx, %rdi
+; SRC-NEXT:    shlq %cl, %rdx
+; SRC-NEXT:    testb $64, %cl
+; SRC-NEXT:    cmovneq %rdx, %rdi
+; SRC-NEXT:    cmovneq %r8, %rdx
+; SRC-NEXT:    addb $3, %sil
+; SRC-NEXT:    cmovsq %r8, %r9
+; SRC-NEXT:    cmovnsq %r10, %rdx
 ; SRC-NEXT:    cmoveq %r8, %rdx
+; SRC-NEXT:    cmovnsq %r8, %rdi
+; SRC-NEXT:    cmoveq %r8, %rdi
 ; SRC-NEXT:    cmovsq %r8, %r11
-; SRC-NEXT:    cmovsq %r8, %rbx
-; SRC-NEXT:    movq %r11, 8(%rax)
-; SRC-NEXT:    movq %rbx, (%rax)
-; SRC-NEXT:    movq %rdx, 24(%rax)
-; SRC-NEXT:    movq %rdi, 16(%rax)
-; SRC-NEXT:    popq %rbx
+; SRC-NEXT:    movq %r9, 8(%rax)
+; SRC-NEXT:    movq %r11, (%rax)
+; SRC-NEXT:    movq %rdi, 24(%rax)
+; SRC-NEXT:    movq %rdx, 16(%rax)
 ; SRC-NEXT:    retq
 ;
 ; LIN-LABEL: test1:
 ; LIN:       # %bb.0:
 ; LIN-NEXT:    movq %rdi, %rax
-; LIN-NEXT:    xorl %r9d, %r9d
-; LIN-NEXT:    movl $1, %r8d
-; LIN-NEXT:    addl %esi, %esi
-; LIN-NEXT:    leal 3(%rsi), %ecx
-; LIN-NEXT:    movl $1, %edi
-; LIN-NEXT:    shlq %cl, %rdi
-; LIN-NEXT:    testb $64, %cl
-; LIN-NEXT:    movq %rdi, %rdx
-; LIN-NEXT:    cmovneq %r9, %rdx
-; LIN-NEXT:    testb %cl, %cl
-; LIN-NEXT:    cmovsq %r9, %rdx
-; LIN-NEXT:    movq %rdx, (%rax)
+; LIN-NEXT:    xorl %r8d, %r8d
+; LIN-NEXT:    movl $1, %r9d
+; LIN-NEXT:    movb $125, %r10b
+; LIN-NEXT:    leal (%rsi,%rsi), %edi
+; LIN-NEXT:    subb %dil, %r10b
+; LIN-NEXT:    movl $1, %esi
+; LIN-NEXT:    movl %r10d, %ecx
+; LIN-NEXT:    shlq %cl, %rsi
+; LIN-NEXT:    testb $64, %r10b
+; LIN-NEXT:    movq %rsi, %rcx
+; LIN-NEXT:    cmovneq %r8, %rcx
+; LIN-NEXT:    movl %edi, %edx
+; LIN-NEXT:    addb $3, %dl
+; LIN-NEXT:    cmovsq %r8, %rcx
+; LIN-NEXT:    movq %rcx, (%rax)
 ; LIN-NEXT:    xorl %edx, %edx
-; LIN-NEXT:    # kill: def $cl killed $cl killed $ecx
-; LIN-NEXT:    shldq %cl, %r8, %rdx
-; LIN-NEXT:    cmovneq %rdi, %rdx
-; LIN-NEXT:    cmovsq %r9, %rdx
+; LIN-NEXT:    movl %r10d, %ecx
+; LIN-NEXT:    shldq %cl, %r9, %rdx
+; LIN-NEXT:    cmovneq %rsi, %rdx
+; LIN-NEXT:    cmovsq %r8, %rdx
 ; LIN-NEXT:    movq %rdx, 8(%rax)
-; LIN-NEXT:    leal -125(%rsi), %r10d
+; LIN-NEXT:    addb $-125, %dil
+; LIN-NEXT:    movl $1, %r11d
+; LIN-NEXT:    movl %edi, %ecx
+; LIN-NEXT:    shlq %cl, %r11
+; LIN-NEXT:    testb $64, %dil
+; LIN-NEXT:    movq %r11, %rsi
+; LIN-NEXT:    cmovneq %r8, %rsi
 ; LIN-NEXT:    movl $1, %edx
 ; LIN-NEXT:    movl %r10d, %ecx
-; LIN-NEXT:    shlq %cl, %rdx
-; LIN-NEXT:    testb $64, %r10b
-; LIN-NEXT:    movq %rdx, %rdi
-; LIN-NEXT:    cmovneq %r9, %rdi
-; LIN-NEXT:    movb $125, %cl
-; LIN-NEXT:    subb %sil, %cl
-; LIN-NEXT:    movl $1, %esi
-; LIN-NEXT:    shrdq %cl, %r9, %rsi
-; LIN-NEXT:    testb $64, %cl
-; LIN-NEXT:    cmovneq %r9, %rsi
-; LIN-NEXT:    cmovsq %rdi, %rsi
-; LIN-NEXT:    cmoveq %r9, %rsi
-; LIN-NEXT:    movq %rsi, 16(%rax)
-; LIN-NEXT:    xorl %esi, %esi
-; LIN-NEXT:    movl %r10d, %ecx
-; LIN-NEXT:    shldq %cl, %r8, %rsi
-; LIN-NEXT:    cmovneq %rdx, %rsi
-; LIN-NEXT:    cmovnsq %r9, %rsi
-; LIN-NEXT:    cmoveq %r9, %rsi
-; LIN-NEXT:    movq %rsi, 24(%rax)
+; LIN-NEXT:    shrdq %cl, %r8, %rdx
+; LIN-NEXT:    cmovneq %r8, %rdx
+; LIN-NEXT:    cmovsq %rsi, %rdx
+; LIN-NEXT:    cmoveq %r8, %rdx
+; LIN-NEXT:    movq %rdx, 16(%rax)
+; LIN-NEXT:    xorl %edx, %edx
+; LIN-NEXT:    movl %edi, %ecx
+; LIN-NEXT:    shldq %cl, %r9, %rdx
+; LIN-NEXT:    cmovneq %r11, %rdx
+; LIN-NEXT:    cmovnsq %r8, %rdx
+; LIN-NEXT:    cmoveq %r8, %rdx
+; LIN-NEXT:    movq %rdx, 24(%rax)
 ; LIN-NEXT:    retq
   %b = add i256 %a, 1
   %m = shl i256 %b, 1
diff --git a/llvm/test/CodeGen/X86/shift-i128.ll b/llvm/test/CodeGen/X86/shift-i128.ll
--- a/llvm/test/CodeGen/X86/shift-i128.ll
+++ b/llvm/test/CodeGen/X86/shift-i128.ll
@@ -13,112 +13,124 @@
 ; i686-NEXT:    pushl %ebx
 ; i686-NEXT:    pushl %edi
 ; i686-NEXT:    pushl %esi
-; i686-NEXT:    subl $20, %esp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subl $28, %esp
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl %ebp, %esi
+; i686-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; i686-NEXT:    movb %ah, %al
+; i686-NEXT:    subb $64, %al
+; i686-NEXT:    negb %al
+; i686-NEXT:    movl %ebp, %edx
 ; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    shrdl %cl, %edi, %esi
-; i686-NEXT:    shrl %cl, %edx
-; i686-NEXT:    shrl %cl, %edi
+; i686-NEXT:    shldl %cl, %edi, %edx
+; i686-NEXT:    shrl %cl, %ebp
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    shrl %cl, %esi
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB0_1
-; i686-NEXT:  # %bb.2: # %entry
-; i686-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; i686-NEXT:    jmp .LBB0_3
-; i686-NEXT:  .LBB0_1:
-; i686-NEXT:    movl %edi, %esi
-; i686-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
-; i686-NEXT:    xorl %edi, %edi
-; i686-NEXT:  .LBB0_3: # %entry
-; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %eax, %edx
-; i686-NEXT:    subb $64, %dl
-; i686-NEXT:    jb .LBB0_5
-; i686-NEXT:  # %bb.4: # %entry
-; i686-NEXT:    xorl %edi, %edi
-; i686-NEXT:  .LBB0_5: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    negb %dl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movl $0, %esi
+; i686-NEXT:    movl $0, %ecx
+; i686-NEXT:    jne .LBB0_2
+; i686-NEXT:  # %bb.1: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:  .LBB0_2: # %entry
+; i686-NEXT:    movl %esi, (%esp) # 4-byte Spill
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %ebx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    shll %cl, %ebx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    movl %ebx, %ecx
+; i686-NEXT:    jne .LBB0_4
+; i686-NEXT:  # %bb.3: # %entry
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    shldl %cl, %ebp, %edi
-; i686-NEXT:    movl %ebp, %esi
-; i686-NEXT:    shll %cl, %esi
+; i686-NEXT:  .LBB0_4: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %edx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shrdl %cl, %esi, %edx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB0_6
+; i686-NEXT:  # %bb.5: # %entry
+; i686-NEXT:    movl %edx, %ebp
+; i686-NEXT:  .LBB0_6: # %entry
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jb .LBB0_8
+; i686-NEXT:  # %bb.7: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:  .LBB0_8: # %entry
+; i686-NEXT:    movb %ah, %dl
+; i686-NEXT:    addb $-64, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shrl %cl, %ebp
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl %esi, %ebx
-; i686-NEXT:    jne .LBB0_7
-; i686-NEXT:  # %bb.6: # %entry
-; i686-NEXT:    movl %edi, %ebx
-; i686-NEXT:  .LBB0_7: # %entry
-; i686-NEXT:    movb %al, %ah
-; i686-NEXT:    addb $-64, %ah
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movb %ah, %cl
-; i686-NEXT:    shrl %cl, %edi
-; i686-NEXT:    testb $32, %ah
 ; i686-NEXT:    movl $0, %ecx
-; i686-NEXT:    jne .LBB0_9
-; i686-NEXT:  # %bb.8: # %entry
-; i686-NEXT:    movl %edi, %ecx
-; i686-NEXT:  .LBB0_9: # %entry
-; i686-NEXT:    cmpb $64, %al
-; i686-NEXT:    jb .LBB0_10
-; i686-NEXT:  # %bb.11: # %entry
+; i686-NEXT:    jne .LBB0_10
+; i686-NEXT:  # %bb.9: # %entry
+; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:  .LBB0_10: # %entry
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jb .LBB0_11
+; i686-NEXT:  # %bb.12: # %entry
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    jmp .LBB0_12
-; i686-NEXT:  .LBB0_10:
+; i686-NEXT:    jmp .LBB0_13
+; i686-NEXT:  .LBB0_11:
 ; i686-NEXT:    movl (%esp), %ecx # 4-byte Reload
-; i686-NEXT:    orl %ebx, %ecx
-; i686-NEXT:  .LBB0_12: # %entry
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; i686-NEXT:  .LBB0_13: # %entry
 ; i686-NEXT:    movl %ecx, (%esp) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    jne .LBB0_14
-; i686-NEXT:  # %bb.13: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB0_14: # %entry
-; i686-NEXT:    movl %ebx, %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB0_15
+; i686-NEXT:  # %bb.14: # %entry
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB0_15: # %entry
+; i686-NEXT:    movl %ecx, %ebx
 ; i686-NEXT:    movl %eax, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shrdl %cl, %esi, %edx
+; i686-NEXT:    shrdl %cl, %esi, %ebx
 ; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB0_16
-; i686-NEXT:  # %bb.15: # %entry
-; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB0_16: # %entry
-; i686-NEXT:    movb %ah, %cl
+; i686-NEXT:    jne .LBB0_17
+; i686-NEXT:  # %bb.16: # %entry
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB0_17: # %entry
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shrdl %cl, %esi, %edi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    jne .LBB0_19
+; i686-NEXT:  # %bb.18: # %entry
+; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:  .LBB0_19: # %entry
+; i686-NEXT:    cmpb $64, %ah
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    shrdl %cl, %edx, %ebp
-; i686-NEXT:    testb $32, %ah
-; i686-NEXT:    jne .LBB0_18
-; i686-NEXT:  # %bb.17: # %entry
-; i686-NEXT:    movl %ebp, %edi
-; i686-NEXT:  .LBB0_18: # %entry
-; i686-NEXT:    cmpb $64, %al
-; i686-NEXT:    jae .LBB0_20
-; i686-NEXT:  # %bb.19:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; i686-NEXT:  .LBB0_20: # %entry
+; i686-NEXT:    jae .LBB0_21
+; i686-NEXT:  # %bb.20:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; i686-NEXT:  .LBB0_21: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    je .LBB0_22
-; i686-NEXT:  # %bb.21: # %entry
-; i686-NEXT:    movl %edi, %ebx
-; i686-NEXT:    movl (%esp), %esi # 4-byte Reload
-; i686-NEXT:  .LBB0_22: # %entry
+; i686-NEXT:    testb %ah, %ah
+; i686-NEXT:    je .LBB0_23
+; i686-NEXT:  # %bb.22: # %entry
+; i686-NEXT:    movl %ebp, %edx
+; i686-NEXT:    movl (%esp), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB0_23: # %entry
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; i686-NEXT:    movl %eax, 12(%ecx)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; i686-NEXT:    movl %eax, 8(%ecx)
-; i686-NEXT:    movl %esi, 4(%ecx)
-; i686-NEXT:    movl %ebx, (%ecx)
-; i686-NEXT:    addl $20, %esp
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, 4(%ecx)
+; i686-NEXT:    movl %edx, (%ecx)
+; i686-NEXT:    addl $28, %esp
 ; i686-NEXT:    popl %esi
 ; i686-NEXT:    popl %edi
 ; i686-NEXT:    popl %ebx
@@ -150,116 +162,128 @@
 ; i686-NEXT:    pushl %ebx
 ; i686-NEXT:    pushl %edi
 ; i686-NEXT:    pushl %esi
-; i686-NEXT:    subl $24, %esp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subl $28, %esp
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl %ebp, %esi
+; i686-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; i686-NEXT:    movb %ah, %al
+; i686-NEXT:    subb $64, %al
+; i686-NEXT:    negb %al
+; i686-NEXT:    movl %ebx, %edx
 ; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    shrdl %cl, %ebx, %esi
-; i686-NEXT:    shrl %cl, %edx
-; i686-NEXT:    movl %ebx, %edi
-; i686-NEXT:    sarl %cl, %edi
-; i686-NEXT:    sarl $31, %ebx
+; i686-NEXT:    shldl %cl, %edi, %edx
+; i686-NEXT:    movl %ebx, %ebp
+; i686-NEXT:    sarl %cl, %ebp
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    shrl %cl, %esi
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ebx, %esi
+; i686-NEXT:    sarl $31, %esi
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; i686-NEXT:    testb $32, %al
-; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jne .LBB1_1
-; i686-NEXT:  # %bb.2: # %entry
-; i686-NEXT:    movl %edx, (%esp) # 4-byte Spill
-; i686-NEXT:    jmp .LBB1_3
-; i686-NEXT:  .LBB1_1:
-; i686-NEXT:    movl %edi, %esi
-; i686-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
-; i686-NEXT:    movl %ebx, %edi
-; i686-NEXT:  .LBB1_3: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %eax, %edx
-; i686-NEXT:    subb $64, %dl
-; i686-NEXT:    jb .LBB1_5
-; i686-NEXT:  # %bb.4: # %entry
-; i686-NEXT:    movl %ebx, %edi
-; i686-NEXT:  .LBB1_5: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    negb %dl
+; i686-NEXT:    movl $0, %ebx
+; i686-NEXT:    movl %esi, %ecx
+; i686-NEXT:    jne .LBB1_2
+; i686-NEXT:  # %bb.1: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:  .LBB1_2: # %entry
+; i686-NEXT:    movl %ebx, (%esp) # 4-byte Spill
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %ebx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    shll %cl, %ebx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    movl %ebx, %ecx
+; i686-NEXT:    jne .LBB1_4
+; i686-NEXT:  # %bb.3: # %entry
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:  .LBB1_4: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %edx
+; i686-NEXT:    movl %eax, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    shrdl %cl, %edi, %edx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB1_6
+; i686-NEXT:  # %bb.5: # %entry
+; i686-NEXT:    movl %edx, %ebp
+; i686-NEXT:  .LBB1_6: # %entry
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jb .LBB1_8
+; i686-NEXT:  # %bb.7: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB1_8: # %entry
+; i686-NEXT:    movb %ah, %dl
+; i686-NEXT:    addb $-64, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    shldl %cl, %ebp, %edi
-; i686-NEXT:    movl %ebp, %esi
-; i686-NEXT:    shll %cl, %esi
+; i686-NEXT:    sarl %cl, %ebp
 ; i686-NEXT:    testb $32, %dl
 ; i686-NEXT:    movl %esi, %ecx
-; i686-NEXT:    jne .LBB1_7
-; i686-NEXT:  # %bb.6: # %entry
-; i686-NEXT:    movl %edi, %ecx
-; i686-NEXT:  .LBB1_7: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movb %al, %ah
-; i686-NEXT:    addb $-64, %ah
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movb %ah, %cl
-; i686-NEXT:    sarl %cl, %edi
-; i686-NEXT:    testb $32, %ah
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    jne .LBB1_9
-; i686-NEXT:  # %bb.8: # %entry
-; i686-NEXT:    movl %edi, %ecx
-; i686-NEXT:  .LBB1_9: # %entry
-; i686-NEXT:    cmpb $64, %al
-; i686-NEXT:    jb .LBB1_10
-; i686-NEXT:  # %bb.11: # %entry
-; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jmp .LBB1_12
-; i686-NEXT:  .LBB1_10:
+; i686-NEXT:    jne .LBB1_10
+; i686-NEXT:  # %bb.9: # %entry
+; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:  .LBB1_10: # %entry
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jb .LBB1_11
+; i686-NEXT:  # %bb.12: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    jmp .LBB1_13
+; i686-NEXT:  .LBB1_11:
 ; i686-NEXT:    movl (%esp), %ecx # 4-byte Reload
 ; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; i686-NEXT:  .LBB1_12: # %entry
+; i686-NEXT:  .LBB1_13: # %entry
 ; i686-NEXT:    movl %ecx, (%esp) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    jne .LBB1_14
-; i686-NEXT:  # %bb.13: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB1_14: # %entry
-; i686-NEXT:    movl %ebx, %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB1_15
+; i686-NEXT:  # %bb.14: # %entry
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB1_15: # %entry
+; i686-NEXT:    movl %ecx, %ebx
 ; i686-NEXT:    movl %eax, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shrdl %cl, %esi, %edx
+; i686-NEXT:    shrdl %cl, %esi, %ebx
 ; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB1_16
-; i686-NEXT:  # %bb.15: # %entry
-; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB1_16: # %entry
-; i686-NEXT:    movb %ah, %cl
+; i686-NEXT:    jne .LBB1_17
+; i686-NEXT:  # %bb.16: # %entry
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB1_17: # %entry
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shrdl %cl, %esi, %edi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    jne .LBB1_19
+; i686-NEXT:  # %bb.18: # %entry
+; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:  .LBB1_19: # %entry
+; i686-NEXT:    cmpb $64, %ah
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    shrdl %cl, %edx, %ebp
-; i686-NEXT:    testb $32, %ah
-; i686-NEXT:    jne .LBB1_18
-; i686-NEXT:  # %bb.17: # %entry
-; i686-NEXT:    movl %ebp, %edi
-; i686-NEXT:  .LBB1_18: # %entry
-; i686-NEXT:    cmpb $64, %al
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    jae .LBB1_20
-; i686-NEXT:  # %bb.19:
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; i686-NEXT:    movl %ecx, %edi
-; i686-NEXT:  .LBB1_20: # %entry
+; i686-NEXT:    jae .LBB1_21
+; i686-NEXT:  # %bb.20:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; i686-NEXT:  .LBB1_21: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    je .LBB1_22
-; i686-NEXT:  # %bb.21: # %entry
-; i686-NEXT:    movl %edi, %ebx
-; i686-NEXT:    movl (%esp), %esi # 4-byte Reload
-; i686-NEXT:  .LBB1_22: # %entry
+; i686-NEXT:    testb %ah, %ah
+; i686-NEXT:    je .LBB1_23
+; i686-NEXT:  # %bb.22: # %entry
+; i686-NEXT:    movl %ebp, %edx
+; i686-NEXT:    movl (%esp), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB1_23: # %entry
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; i686-NEXT:    movl %eax, 12(%ecx)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; i686-NEXT:    movl %eax, 8(%ecx)
-; i686-NEXT:    movl %esi, 4(%ecx)
-; i686-NEXT:    movl %ebx, (%ecx)
-; i686-NEXT:    addl $24, %esp
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, 4(%ecx)
+; i686-NEXT:    movl %edx, (%ecx)
+; i686-NEXT:    addl $28, %esp
 ; i686-NEXT:    popl %esi
 ; i686-NEXT:    popl %edi
 ; i686-NEXT:    popl %ebx
@@ -292,113 +316,125 @@
 ; i686-NEXT:    pushl %ebx
 ; i686-NEXT:    pushl %edi
 ; i686-NEXT:    pushl %esi
-; i686-NEXT:    subl $20, %esp
+; i686-NEXT:    subl $28, %esp
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movb {{[0-9]+}}(%esp), %ah
+; i686-NEXT:    movb %ah, %al
+; i686-NEXT:    subb $64, %al
+; i686-NEXT:    negb %al
 ; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    shll %cl, %ebx
-; i686-NEXT:    movl %ebp, %esi
-; i686-NEXT:    shll %cl, %esi
-; i686-NEXT:    movl %edi, %edx
-; i686-NEXT:    shldl %cl, %ebp, %edx
+; i686-NEXT:    shll %cl, %ebp
+; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    shll %cl, %edx
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB2_1
-; i686-NEXT:  # %bb.2: # %entry
+; i686-NEXT:    movl $0, %esi
+; i686-NEXT:    movl $0, %ecx
+; i686-NEXT:    jne .LBB2_2
+; i686-NEXT:  # %bb.1: # %entry
+; i686-NEXT:    movl %edx, %esi
+; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:  .LBB2_2: # %entry
+; i686-NEXT:    movl %esi, (%esp) # 4-byte Spill
 ; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ebx, (%esp) # 4-byte Spill
-; i686-NEXT:    jmp .LBB2_3
-; i686-NEXT:  .LBB2_1:
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl $0, (%esp) # 4-byte Folded Spill
-; i686-NEXT:    xorl %esi, %esi
-; i686-NEXT:  .LBB2_3: # %entry
-; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %eax, %edx
-; i686-NEXT:    subb $64, %dl
-; i686-NEXT:    jb .LBB2_5
-; i686-NEXT:  # %bb.4: # %entry
-; i686-NEXT:    xorl %esi, %esi
-; i686-NEXT:  .LBB2_5: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    negb %dl
-; i686-NEXT:    movl %edi, %esi
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %ebx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    shrl %cl, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    shrdl %cl, %edi, %edx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    movl %ebx, %ecx
+; i686-NEXT:    jne .LBB2_4
+; i686-NEXT:  # %bb.3: # %entry
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    shrl %cl, %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shrdl %cl, %edi, %ebx
+; i686-NEXT:  .LBB2_4: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %edx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shldl %cl, %esi, %edx
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB2_6
+; i686-NEXT:  # %bb.5: # %entry
+; i686-NEXT:    movl %edx, %ebp
+; i686-NEXT:  .LBB2_6: # %entry
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jb .LBB2_8
+; i686-NEXT:  # %bb.7: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:  .LBB2_8: # %entry
+; i686-NEXT:    movb %ah, %dl
+; i686-NEXT:    addb $-64, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %ebp
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl %esi, %ebp
-; i686-NEXT:    jne .LBB2_7
-; i686-NEXT:  # %bb.6: # %entry
-; i686-NEXT:    movl %ebx, %ebp
-; i686-NEXT:  .LBB2_7: # %entry
-; i686-NEXT:    movb %al, %ah
-; i686-NEXT:    addb $-64, %ah
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    movb %ah, %cl
-; i686-NEXT:    shll %cl, %ebx
-; i686-NEXT:    testb $32, %ah
 ; i686-NEXT:    movl $0, %ecx
-; i686-NEXT:    jne .LBB2_9
-; i686-NEXT:  # %bb.8: # %entry
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:  .LBB2_9: # %entry
-; i686-NEXT:    cmpb $64, %al
-; i686-NEXT:    jb .LBB2_10
-; i686-NEXT:  # %bb.11: # %entry
+; i686-NEXT:    jne .LBB2_10
+; i686-NEXT:  # %bb.9: # %entry
+; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:  .LBB2_10: # %entry
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jb .LBB2_11
+; i686-NEXT:  # %bb.12: # %entry
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    jmp .LBB2_12
-; i686-NEXT:  .LBB2_10:
+; i686-NEXT:    jmp .LBB2_13
+; i686-NEXT:  .LBB2_11:
 ; i686-NEXT:    movl (%esp), %ecx # 4-byte Reload
-; i686-NEXT:    orl %ebp, %ecx
-; i686-NEXT:  .LBB2_12: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; i686-NEXT:  .LBB2_13: # %entry
 ; i686-NEXT:    movl %ecx, (%esp) # 4-byte Spill
-; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    jne .LBB2_14
-; i686-NEXT:  # %bb.13: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB2_14: # %entry
-; i686-NEXT:    movl %edx, %esi
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    shldl %cl, %ebp, %esi
 ; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB2_16
-; i686-NEXT:  # %bb.15: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB2_16: # %entry
-; i686-NEXT:    movb %ah, %cl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    jne .LBB2_15
+; i686-NEXT:  # %bb.14: # %entry
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB2_15: # %entry
+; i686-NEXT:    movl %ecx, %ebx
+; i686-NEXT:    movl %eax, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shldl %cl, %esi, %edi
-; i686-NEXT:    testb $32, %ah
-; i686-NEXT:    jne .LBB2_18
-; i686-NEXT:  # %bb.17: # %entry
-; i686-NEXT:    movl %edi, %ebx
-; i686-NEXT:  .LBB2_18: # %entry
-; i686-NEXT:    cmpb $64, %al
+; i686-NEXT:    shldl %cl, %esi, %ebx
+; i686-NEXT:    testb $32, %al
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:    jae .LBB2_20
-; i686-NEXT:  # %bb.19:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
-; i686-NEXT:  .LBB2_20: # %entry
+; i686-NEXT:    jne .LBB2_17
+; i686-NEXT:  # %bb.16: # %entry
+; i686-NEXT:    movl %ebx, %esi
+; i686-NEXT:  .LBB2_17: # %entry
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    shldl %cl, %ebx, %edi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    jne .LBB2_19
+; i686-NEXT:  # %bb.18: # %entry
+; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:  .LBB2_19: # %entry
+; i686-NEXT:    cmpb $64, %ah
+; i686-NEXT:    jae .LBB2_21
+; i686-NEXT:  # %bb.20:
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; i686-NEXT:    movl %esi, %ebp
+; i686-NEXT:  .LBB2_21: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    je .LBB2_22
-; i686-NEXT:  # %bb.21: # %entry
-; i686-NEXT:    movl %ebx, %edx
-; i686-NEXT:    movl (%esp), %ebp # 4-byte Reload
-; i686-NEXT:  .LBB2_22: # %entry
+; i686-NEXT:    testb %ah, %ah
+; i686-NEXT:    je .LBB2_23
+; i686-NEXT:  # %bb.22: # %entry
+; i686-NEXT:    movl %ebp, %edx
+; i686-NEXT:    movl (%esp), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB2_23: # %entry
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
 ; i686-NEXT:    movl %eax, 4(%ecx)
-; i686-NEXT:    movl %esi, (%ecx)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, (%ecx)
 ; i686-NEXT:    movl %edx, 12(%ecx)
-; i686-NEXT:    movl %ebp, 8(%ecx)
-; i686-NEXT:    addl $20, %esp
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, 8(%ecx)
+; i686-NEXT:    addl $28, %esp
 ; i686-NEXT:    popl %esi
 ; i686-NEXT:    popl %edi
 ; i686-NEXT:    popl %ebx
@@ -464,259 +500,281 @@
 ; i686-NEXT:    pushl %ebx
 ; i686-NEXT:    pushl %edi
 ; i686-NEXT:    pushl %esi
-; i686-NEXT:    subl $68, %esp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    subl $72, %esp
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    movl %ebx, %edi
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    shrl %cl, %edi
-; i686-NEXT:    movl %esi, %ebp
+; i686-NEXT:    movb $64, %dl
+; i686-NEXT:    movb $64, %ch
+; i686-NEXT:    subb %al, %ch
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl $0, %esi
+; i686-NEXT:    jne .LBB6_2
+; i686-NEXT:  # %bb.1: # %entry
+; i686-NEXT:    movl %eax, %esi
+; i686-NEXT:  .LBB6_2: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:    movb %ch, %cl
 ; i686-NEXT:    shrl %cl, %ebp
-; i686-NEXT:    shrdl %cl, %esi, %edx
-; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB6_1
-; i686-NEXT:  # %bb.2: # %entry
-; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jmp .LBB6_3
-; i686-NEXT:  .LBB6_1:
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:  .LBB6_3: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    shrdl %cl, %ebx, %esi
-; i686-NEXT:    testb $32, %al
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    jne .LBB6_5
-; i686-NEXT:  # %bb.4: # %entry
-; i686-NEXT:    movl %esi, %edi
-; i686-NEXT:  .LBB6_5: # %entry
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movl $0, %esi
+; i686-NEXT:    jne .LBB6_4
+; i686-NEXT:  # %bb.3: # %entry
+; i686-NEXT:    movl %ebp, %esi
+; i686-NEXT:  .LBB6_4: # %entry
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shrl %cl, %eax
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl $0, %eax
+; i686-NEXT:    jne .LBB6_6
+; i686-NEXT:  # %bb.5: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:  .LBB6_6: # %entry
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subb %al, %dl
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    movb %dl, %cl
+; i686-NEXT:    shrl %cl, %esi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl $0, %eax
+; i686-NEXT:    jne .LBB6_8
+; i686-NEXT:  # %bb.7: # %entry
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:  .LBB6_8: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ebx, %esi
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    shrdl %cl, %eax, %esi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subl $64, %eax
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    shrl %cl, %ebx
-; i686-NEXT:    shrl %cl, %ebp
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    subl $64, %ecx
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
+; i686-NEXT:    sbbl $0, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    sbbl $0, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    sbbl $0, %ebx
 ; i686-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    movl $0, %ecx
-; i686-NEXT:    jne .LBB6_7
-; i686-NEXT:  # %bb.6: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:  .LBB6_7: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    shrdl %cl, %ebp, %esi
-; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    jne .LBB6_9
-; i686-NEXT:  # %bb.8: # %entry
-; i686-NEXT:    movl %esi, %ebx
-; i686-NEXT:  .LBB6_9: # %entry
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB6_10
+; i686-NEXT:  # %bb.9: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB6_10: # %entry
 ; i686-NEXT:    movl %edi, %esi
-; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    shrl %cl, %ebp
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl $0, %ecx
-; i686-NEXT:    jne .LBB6_11
-; i686-NEXT:  # %bb.10: # %entry
-; i686-NEXT:    movl %ebp, %ecx
-; i686-NEXT:  .LBB6_11: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movb $64, %cl
-; i686-NEXT:    subb %dl, %cl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movb %ch, %cl
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shldl %cl, %ebx, %edi
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ebx, %edi
-; i686-NEXT:    shll %cl, %edi
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movb $64, %bl
+; i686-NEXT:    shldl %cl, %ebx, %esi
+; i686-NEXT:    testb $32, %ch
 ; i686-NEXT:    jne .LBB6_12
+; i686-NEXT:  # %bb.11: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB6_12: # %entry
+; i686-NEXT:    movl %ebx, %esi
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shrdl %cl, %edi, %esi
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB6_14
 ; i686-NEXT:  # %bb.13: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jmp .LBB6_14
-; i686-NEXT:  .LBB6_12:
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    movl %esi, %ebp
 ; i686-NEXT:  .LBB6_14: # %entry
-; i686-NEXT:    movl %esi, %edi
 ; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    shrdl %cl, %ebp, %esi
+; i686-NEXT:    shrl %cl, %esi
 ; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movl $0, %ecx
 ; i686-NEXT:    jne .LBB6_16
 ; i686-NEXT:  # %bb.15: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, %ecx
 ; i686-NEXT:  .LBB6_16: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    subb %al, %bl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    shll %cl, %ebp
-; i686-NEXT:    testb $32, %bl
+; i686-NEXT:    movl %ebp, %ebx
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %ebx
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb $32, %dl
 ; i686-NEXT:    movl $0, %ecx
 ; i686-NEXT:    jne .LBB6_18
 ; i686-NEXT:  # %bb.17: # %entry
-; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; i686-NEXT:  .LBB6_18: # %entry
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    subl $64, %ecx
-; i686-NEXT:    sbbl $0, %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    sbbl $0, %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    sbbl $0, %esi
-; i686-NEXT:    setae %bh
-; i686-NEXT:    jb .LBB6_20
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    shrdl %cl, %ebx, %esi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    jne .LBB6_20
 ; i686-NEXT:  # %bb.19: # %entry
-; i686-NEXT:    xorl %edi, %edi
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB6_20: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shrdl %cl, %esi, %edi
-; i686-NEXT:    shrl %cl, %esi
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shldl %cl, %ebp, %esi
+; i686-NEXT:    testb $32, %dl
 ; i686-NEXT:    jne .LBB6_22
 ; i686-NEXT:  # %bb.21: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB6_22: # %entry
-; i686-NEXT:    testb %bh, %bh
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    shrl %cl, %esi
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    movl $0, %ecx
 ; i686-NEXT:    jne .LBB6_24
-; i686-NEXT:  # %bb.23:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Folded Reload
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  # %bb.23: # %entry
+; i686-NEXT:    movl %esi, %ecx
 ; i686-NEXT:  .LBB6_24: # %entry
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl $0, %ecx
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ebp, %ebx
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shrdl %cl, %esi, %ebx
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; i686-NEXT:    jne .LBB6_26
 ; i686-NEXT:  # %bb.25: # %entry
-; i686-NEXT:    movl %esi, %ecx
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB6_26: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    shldl %cl, %edi, %esi
-; i686-NEXT:    testb $32, %bl
-; i686-NEXT:    jne .LBB6_28
+; i686-NEXT:    subl $64, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    movl %esi, %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    setae %dl
+; i686-NEXT:    jb .LBB6_28
 ; i686-NEXT:  # %bb.27: # %entry
-; i686-NEXT:    movl %esi, %ebp
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
 ; i686-NEXT:  .LBB6_28: # %entry
-; i686-NEXT:    testb %bh, %bh
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shrdl %cl, %edi, %esi
+; i686-NEXT:    shrl %cl, %edi
+; i686-NEXT:    testb $32, %cl
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    jne .LBB6_30
-; i686-NEXT:  # %bb.29:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    orl %ebp, %ecx
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  # %bb.29: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB6_30: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
+; i686-NEXT:    testb %dl, %dl
 ; i686-NEXT:    jne .LBB6_32
-; i686-NEXT:  # %bb.31: # %entry
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:  # %bb.31:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB6_32: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    shrdl %cl, %ebp, %edi
-; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
 ; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; i686-NEXT:    je .LBB6_33
-; i686-NEXT:  # %bb.34: # %entry
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    jne .LBB6_35
-; i686-NEXT:  .LBB6_36: # %entry
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    je .LBB6_38
-; i686-NEXT:  .LBB6_37:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; i686-NEXT:    movl $0, %ecx
+; i686-NEXT:    jne .LBB6_34
+; i686-NEXT:  # %bb.33: # %entry
+; i686-NEXT:    movl %edi, %ecx
+; i686-NEXT:  .LBB6_34: # %entry
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    movb {{[-0-9]+}}(%e{{[sb]}}p), %ch # 1-byte Reload
+; i686-NEXT:    je .LBB6_35
+; i686-NEXT:  # %bb.36: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    testb %ch, %ch
+; i686-NEXT:    je .LBB6_38
+; i686-NEXT:    jmp .LBB6_39
+; i686-NEXT:  .LBB6_35:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Folded Reload
+; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb %ch, %ch
+; i686-NEXT:    jne .LBB6_39
 ; i686-NEXT:  .LBB6_38: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:  .LBB6_39: # %entry
+; i686-NEXT:    movb %al, %cl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    shrdl %cl, %edx, %ebp
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB6_41
+; i686-NEXT:  # %bb.40: # %entry
+; i686-NEXT:    movl %ebp, %esi
+; i686-NEXT:  .LBB6_41: # %entry
+; i686-NEXT:    testb %ch, %ch
+; i686-NEXT:    je .LBB6_43
+; i686-NEXT:  # %bb.42:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; i686-NEXT:  .LBB6_43: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
+; i686-NEXT:    testb %ch, %ch
+; i686-NEXT:    jne .LBB6_44
+; i686-NEXT:  # %bb.45: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    jmp .LBB6_46
+; i686-NEXT:  .LBB6_44:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB6_46: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; i686-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl %ecx, %edx
-; i686-NEXT:    je .LBB6_40
-; i686-NEXT:  # %bb.39: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; i686-NEXT:  .LBB6_40: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    orl %edx, %eax
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    je .LBB6_42
-; i686-NEXT:  # %bb.41: # %entry
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    orl %ecx, %eax
+; i686-NEXT:    je .LBB6_48
+; i686-NEXT:  # %bb.47: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB6_48: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    orl %ecx, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    je .LBB6_50
+; i686-NEXT:  # %bb.49: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:  .LBB6_50: # %entry
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:  .LBB6_42: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 28(%ecx)
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 24(%ecx)
+; i686-NEXT:    movl %esi, 28(%eax)
+; i686-NEXT:    movl %edx, 24(%eax)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 12(%ecx)
+; i686-NEXT:    movl %edx, 12(%eax)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 8(%ecx)
-; i686-NEXT:    movl %esi, 20(%ecx)
-; i686-NEXT:    movl %eax, 16(%ecx)
-; i686-NEXT:    movl %ebx, 4(%ecx)
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT:    movl %eax, (%ecx)
-; i686-NEXT:    addl $68, %esp
+; i686-NEXT:    movl %edx, 8(%eax)
+; i686-NEXT:    movl %ecx, 20(%eax)
+; i686-NEXT:    movl %edi, 16(%eax)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, 4(%eax)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, (%eax)
+; i686-NEXT:    addl $72, %esp
 ; i686-NEXT:    popl %esi
 ; i686-NEXT:    popl %edi
 ; i686-NEXT:    popl %ebx
 ; i686-NEXT:    popl %ebp
 ; i686-NEXT:    retl
-; i686-NEXT:  .LBB6_33: # %entry
-; i686-NEXT:    movl %ebp, %edi
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    je .LBB6_36
-; i686-NEXT:  .LBB6_35:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
-; i686-NEXT:    movl %ecx, %edi
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    jne .LBB6_37
-; i686-NEXT:    jmp .LBB6_38
 ;
 ; x86_64-LABEL: test_lshr_v2i128:
 ; x86_64:       # %bb.0: # %entry
@@ -756,261 +814,289 @@
 ; i686-NEXT:    pushl %edi
 ; i686-NEXT:    pushl %esi
 ; i686-NEXT:    subl $80, %esp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    movl %ebp, %ebx
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    sarl %cl, %ebx
-; i686-NEXT:    movl %esi, %edi
-; i686-NEXT:    shrl %cl, %edi
-; i686-NEXT:    shrdl %cl, %esi, %edx
-; i686-NEXT:    sarl $31, %ebp
+; i686-NEXT:    movb $64, %dl
+; i686-NEXT:    movb $64, %ch
+; i686-NEXT:    subb %al, %ch
+; i686-NEXT:    movl %edi, %eax
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shll %cl, %eax
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    testb $32, %al
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jne .LBB7_1
-; i686-NEXT:  # %bb.2: # %entry
-; i686-NEXT:    movl %edx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl $0, %edi
+; i686-NEXT:    jne .LBB7_2
+; i686-NEXT:  # %bb.1: # %entry
+; i686-NEXT:    movl %eax, %edi
+; i686-NEXT:  .LBB7_2: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, %ebp
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    sarl %cl, %ebp
+; i686-NEXT:    movl %esi, %edi
+; i686-NEXT:    sarl $31, %edi
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    jne .LBB7_4
+; i686-NEXT:  # %bb.3: # %entry
+; i686-NEXT:    movl %ebp, %edi
+; i686-NEXT:  .LBB7_4: # %entry
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shrl %cl, %eax
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
 ; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jmp .LBB7_3
-; i686-NEXT:  .LBB7_1:
+; i686-NEXT:    movl $0, %eax
+; i686-NEXT:    jne .LBB7_6
+; i686-NEXT:  # %bb.5: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:  .LBB7_6: # %entry
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subb %al, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movb %dl, %cl
+; i686-NEXT:    shrl %cl, %edi
+; i686-NEXT:    testb $32, %dl
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    jne .LBB7_8
+; i686-NEXT:  # %bb.7: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_8: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ebx, %edi
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    shrdl %cl, %eax, %edi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subl $64, %eax
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    sbbl $0, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    sbbl $0, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    sbbl $0, %ebx
+; i686-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB7_10
+; i686-NEXT:  # %bb.9: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_10: # %entry
+; i686-NEXT:    movl %esi, %edi
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    shldl %cl, %ebx, %edi
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB7_12
+; i686-NEXT:  # %bb.11: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_12: # %entry
+; i686-NEXT:    movl %ebx, %edi
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shrdl %cl, %esi, %edi
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB7_14
+; i686-NEXT:  # %bb.13: # %entry
+; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:  .LBB7_14: # %entry
 ; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_3: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    shrdl %cl, %edx, %edi
-; i686-NEXT:    testb $32, %al
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    jne .LBB7_5
-; i686-NEXT:  # %bb.4: # %entry
 ; i686-NEXT:    movl %edi, %ebx
-; i686-NEXT:  .LBB7_5: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl %ebp, %edi
-; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    sarl %cl, %edi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shrl %cl, %esi
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    sarl $31, %ebp
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    subl $64, %ecx
+; i686-NEXT:    sarl %cl, %ebx
+; i686-NEXT:    movl %edi, %ecx
+; i686-NEXT:    sarl $31, %ecx
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; i686-NEXT:    jne .LBB7_16
+; i686-NEXT:  # %bb.15: # %entry
+; i686-NEXT:    movl %ebx, %ecx
+; i686-NEXT:  .LBB7_16: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movl %edi, %ebp
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shll %cl, %ebp
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl $0, %esi
+; i686-NEXT:    jne .LBB7_18
+; i686-NEXT:  # %bb.17: # %entry
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_18: # %entry
 ; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ebp, %ecx
-; i686-NEXT:    jne .LBB7_7
-; i686-NEXT:  # %bb.6: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:    movl %edi, %ecx
-; i686-NEXT:  .LBB7_7: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; i686-NEXT:    movl %edx, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    shrdl %cl, %ebp, %esi
+; i686-NEXT:    shrdl %cl, %ebp, %ebx
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    jne .LBB7_9
-; i686-NEXT:  # %bb.8: # %entry
-; i686-NEXT:    movl %esi, %edi
-; i686-NEXT:  .LBB7_9: # %entry
+; i686-NEXT:    jne .LBB7_20
+; i686-NEXT:  # %bb.19: # %entry
 ; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    sarl %cl, %esi
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    jne .LBB7_11
-; i686-NEXT:  # %bb.10: # %entry
-; i686-NEXT:    movl %esi, %ecx
-; i686-NEXT:  .LBB7_11: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    movb $64, %cl
-; i686-NEXT:    subb %dl, %cl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:  .LBB7_20: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shldl %cl, %ebx, %ebp
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ebx, %ebp
-; i686-NEXT:    shll %cl, %ebp
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movb $64, %bl
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    je .LBB7_13
-; i686-NEXT:  # %bb.12:
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    xorl %ebp, %ebp
-; i686-NEXT:  .LBB7_13: # %entry
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    shrdl %cl, %edi, %esi
+; i686-NEXT:    shldl %cl, %edi, %ebx
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    jne .LBB7_15
-; i686-NEXT:  # %bb.14: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_15: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    subb %al, %bl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    shll %cl, %ebp
-; i686-NEXT:    testb $32, %bl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    jne .LBB7_17
-; i686-NEXT:  # %bb.16: # %entry
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_17: # %entry
-; i686-NEXT:    movl %eax, %ecx
-; i686-NEXT:    subl $64, %ecx
-; i686-NEXT:    sbbl $0, %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    sbbl $0, %esi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    sbbl $0, %esi
-; i686-NEXT:    setae %bh
-; i686-NEXT:    jb .LBB7_19
-; i686-NEXT:  # %bb.18: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_19: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shrdl %cl, %edi, %esi
-; i686-NEXT:    sarl %cl, %edi
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    je .LBB7_20
+; i686-NEXT:    jne .LBB7_22
 ; i686-NEXT:  # %bb.21: # %entry
-; i686-NEXT:    testb %bh, %bh
-; i686-NEXT:    je .LBB7_22
-; i686-NEXT:  .LBB7_23: # %entry
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    jne .LBB7_25
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_22: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    sarl %cl, %ebx
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; i686-NEXT:    jne .LBB7_24
+; i686-NEXT:  # %bb.23: # %entry
+; i686-NEXT:    movl %ebx, %eax
 ; i686-NEXT:  .LBB7_24: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_25: # %entry
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shldl %cl, %esi, %edi
-; i686-NEXT:    testb $32, %bl
-; i686-NEXT:    jne .LBB7_27
-; i686-NEXT:  # %bb.26: # %entry
-; i686-NEXT:    movl %edi, %ebp
-; i686-NEXT:  .LBB7_27: # %entry
-; i686-NEXT:    testb %bh, %bh
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:    jne .LBB7_29
-; i686-NEXT:  # %bb.28:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
-; i686-NEXT:    orl %ebp, %ebx
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_29: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    jne .LBB7_31
-; i686-NEXT:  # %bb.30: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB7_31: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movl %edi, %ebx
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    shrdl %cl, %eax, %ebx
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    jne .LBB7_26
+; i686-NEXT:  # %bb.25: # %entry
+; i686-NEXT:    movl %ebx, %ebp
+; i686-NEXT:  .LBB7_26: # %entry
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shrdl %cl, %ebp, %ebx
+; i686-NEXT:    movl %ebx, %ecx
+; i686-NEXT:    subl $64, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    movl %eax, %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    setae %dl
+; i686-NEXT:    jb .LBB7_28
+; i686-NEXT:  # %bb.27: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_28: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    shrdl %cl, %esi, %ebp
+; i686-NEXT:    sarl %cl, %esi
 ; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    jne .LBB7_33
-; i686-NEXT:  # %bb.32: # %entry
-; i686-NEXT:    movl %ebx, %esi
-; i686-NEXT:  .LBB7_33: # %entry
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    jne .LBB7_30
+; i686-NEXT:  # %bb.29: # %entry
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_30: # %entry
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    jne .LBB7_32
+; i686-NEXT:  # %bb.31:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
+; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_32: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    testb $32, %cl
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    jne .LBB7_34
+; i686-NEXT:  # %bb.33: # %entry
+; i686-NEXT:    movl %esi, %ecx
+; i686-NEXT:  .LBB7_34: # %entry
+; i686-NEXT:    testb %dl, %dl
 ; i686-NEXT:    je .LBB7_35
-; i686-NEXT:  # %bb.34:
+; i686-NEXT:  # %bb.36: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    orl %ebx, %ecx
-; i686-NEXT:    movl %ecx, %esi
-; i686-NEXT:  .LBB7_35: # %entry
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    je .LBB7_37
-; i686-NEXT:  # %bb.36:
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    jmp .LBB7_37
+; i686-NEXT:  .LBB7_35:
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB7_37: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl %ecx, %edx
-; i686-NEXT:    je .LBB7_39
+; i686-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 1-byte Folded Reload
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    jne .LBB7_39
 ; i686-NEXT:  # %bb.38: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB7_39: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shrdl %cl, %esi, %edi
+; i686-NEXT:    testb $32, %al
+; i686-NEXT:    jne .LBB7_41
+; i686-NEXT:  # %bb.40: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_41: # %entry
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    je .LBB7_43
+; i686-NEXT:  # %bb.42:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_43: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    jne .LBB7_44
+; i686-NEXT:  # %bb.45: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    jmp .LBB7_46
+; i686-NEXT:  .LBB7_44:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_46: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; i686-NEXT:    orl {{[0-9]+}}(%esp), %eax
 ; i686-NEXT:    orl %edx, %eax
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    je .LBB7_41
-; i686-NEXT:  # %bb.40: # %entry
+; i686-NEXT:    je .LBB7_48
+; i686-NEXT:  # %bb.47: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:  .LBB7_41: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 28(%ecx)
-; i686-NEXT:    movl %edi, 24(%ecx)
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 12(%ecx)
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB7_48: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    orl %edx, %ebx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    je .LBB7_50
+; i686-NEXT:  # %bb.49: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 8(%ecx)
-; i686-NEXT:    movl %esi, 20(%ecx)
-; i686-NEXT:    movl %eax, 16(%ecx)
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT:    movl %eax, 4(%ecx)
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT:    movl %eax, (%ecx)
+; i686-NEXT:  .LBB7_50: # %entry
+; i686-NEXT:    movl %esi, 28(%eax)
+; i686-NEXT:    movl %ecx, 24(%eax)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, 12(%eax)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, 8(%eax)
+; i686-NEXT:    movl %edx, 20(%eax)
+; i686-NEXT:    movl %edi, 16(%eax)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %ecx, 4(%eax)
+; i686-NEXT:    movl %ebp, (%eax)
 ; i686-NEXT:    addl $80, %esp
 ; i686-NEXT:    popl %esi
 ; i686-NEXT:    popl %edi
 ; i686-NEXT:    popl %ebx
 ; i686-NEXT:    popl %ebp
 ; i686-NEXT:    retl
-; i686-NEXT:  .LBB7_20: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    testb %bh, %bh
-; i686-NEXT:    jne .LBB7_23
-; i686-NEXT:  .LBB7_22:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    je .LBB7_24
-; i686-NEXT:    jmp .LBB7_25
 ;
 ; x86_64-LABEL: test_ashr_v2i128:
 ; x86_64:       # %bb.0: # %entry
@@ -1052,260 +1138,262 @@
 ; i686-NEXT:    pushl %ebx
 ; i686-NEXT:    pushl %edi
 ; i686-NEXT:    pushl %esi
-; i686-NEXT:    subl $72, %esp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    subl $68, %esp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    shll %cl, %ebp
-; i686-NEXT:    shll %cl, %esi
-; i686-NEXT:    movl %edx, %eax
-; i686-NEXT:    subl $64, %eax
-; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    sbbl $0, %eax
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    sbbl $0, %eax
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    sbbl $0, %eax
-; i686-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    testb $32, %bl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl $0, %eax
-; i686-NEXT:    movl $0, %ecx
+; i686-NEXT:    movb $64, %dl
+; i686-NEXT:    movb $64, %ch
+; i686-NEXT:    subb %al, %ch
+; i686-NEXT:    movl %ebp, %eax
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl $0, %esi
 ; i686-NEXT:    jne .LBB8_2
 ; i686-NEXT:  # %bb.1: # %entry
-; i686-NEXT:    movl %esi, %eax
-; i686-NEXT:    movl %ebp, %ecx
+; i686-NEXT:    movl %eax, %esi
 ; i686-NEXT:  .LBB8_2: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %edi, %eax
-; i686-NEXT:    movl %ebx, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    shldl %cl, %edi, %eax
-; i686-NEXT:    testb $32, %bl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shll %cl, %eax
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl $0, %esi
 ; i686-NEXT:    jne .LBB8_4
 ; i686-NEXT:  # %bb.3: # %entry
 ; i686-NEXT:    movl %eax, %esi
 ; i686-NEXT:  .LBB8_4: # %entry
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shrl %cl, %edi
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; i686-NEXT:    movl $0, %edi
+; i686-NEXT:    jne .LBB8_6
+; i686-NEXT:  # %bb.5: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; i686-NEXT:  .LBB8_6: # %entry
+; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movb $64, %cl
-; i686-NEXT:    subb %bl, %cl
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl %edi, %esi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subb %al, %dl
+; i686-NEXT:    movl %ebx, %esi
+; i686-NEXT:    movb %dl, %cl
 ; i686-NEXT:    shrl %cl, %esi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl $0, %eax
+; i686-NEXT:    jne .LBB8_8
+; i686-NEXT:  # %bb.7: # %entry
+; i686-NEXT:    movl %esi, %eax
+; i686-NEXT:  .LBB8_8: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    movb %ch, %cl
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    shrdl %cl, %edi, %eax
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    jne .LBB8_5
-; i686-NEXT:  # %bb.6: # %entry
+; i686-NEXT:    shrdl %cl, %eax, %esi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    subl $64, %eax
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    sbbl $0, %edi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    sbbl $0, %edi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    sbbl $0, %edi
+; i686-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    jne .LBB8_10
+; i686-NEXT:  # %bb.9: # %entry
 ; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    jmp .LBB8_7
-; i686-NEXT:  .LBB8_5:
-; i686-NEXT:    movl %esi, %eax
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:  .LBB8_7: # %entry
+; i686-NEXT:  .LBB8_10: # %entry
+; i686-NEXT:    movl %edi, %esi
+; i686-NEXT:    movb %ch, %cl
+; i686-NEXT:    shldl %cl, %ebp, %esi
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB8_12
+; i686-NEXT:  # %bb.11: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB8_12: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    movb %ch, %cl
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl %ebx, %ecx
+; i686-NEXT:    shldl %cl, %edi, %esi
+; i686-NEXT:    testb $32, %ch
+; i686-NEXT:    jne .LBB8_14
+; i686-NEXT:  # %bb.13: # %entry
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB8_14: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shldl %cl, %esi, %edi
-; i686-NEXT:    testb $32, %bl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    jne .LBB8_9
-; i686-NEXT:  # %bb.8: # %entry
-; i686-NEXT:    movl %edi, %ebp
-; i686-NEXT:  .LBB8_9: # %entry
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl %ecx, %ebp
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    shll %cl, %ebp
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; i686-NEXT:    shll %cl, %esi
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl $0, %edi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl $0, %ecx
-; i686-NEXT:    jne .LBB8_11
-; i686-NEXT:  # %bb.10: # %entry
-; i686-NEXT:    movl %esi, %edi
-; i686-NEXT:    movl %ebp, %ecx
-; i686-NEXT:  .LBB8_11: # %entry
-; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    jne .LBB8_16
+; i686-NEXT:  # %bb.15: # %entry
+; i686-NEXT:    movl %esi, %ecx
+; i686-NEXT:  .LBB8_16: # %entry
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ebp, %esi
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shldl %cl, %ebx, %edi
+; i686-NEXT:    shll %cl, %esi
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    jne .LBB8_13
-; i686-NEXT:  # %bb.12: # %entry
-; i686-NEXT:    movl %edi, %ebp
-; i686-NEXT:  .LBB8_13: # %entry
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movb $64, %cl
-; i686-NEXT:    subb %dl, %cl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shrl %cl, %ebx
-; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movb %cl, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Spill
-; i686-NEXT:    testb $32, %cl
 ; i686-NEXT:    movl $0, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    jne .LBB8_15
-; i686-NEXT:  # %bb.14: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:  .LBB8_15: # %entry
+; i686-NEXT:    jne .LBB8_18
+; i686-NEXT:  # %bb.17: # %entry
+; i686-NEXT:    movl %esi, %ecx
+; i686-NEXT:  .LBB8_18: # %entry
 ; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movl %edx, %ecx
+; i686-NEXT:    shrdl %cl, %ebx, %edi
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    jne .LBB8_20
+; i686-NEXT:  # %bb.19: # %entry
 ; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB8_20: # %entry
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:    movl %ecx, %edi
 ; i686-NEXT:    movl %edx, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    shldl %cl, %ebp, %edi
 ; i686-NEXT:    testb $32, %dl
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    jne .LBB8_17
-; i686-NEXT:  # %bb.16: # %entry
+; i686-NEXT:    jne .LBB8_22
+; i686-NEXT:  # %bb.21: # %entry
 ; i686-NEXT:    movl %edi, %esi
-; i686-NEXT:  .LBB8_17: # %entry
-; i686-NEXT:    orl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:    movl %ebx, %eax
-; i686-NEXT:    subl $64, %eax
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    movl %ebp, %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    sbbl $0, %ecx
-; i686-NEXT:    setb {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Spill
-; i686-NEXT:    jb .LBB8_19
-; i686-NEXT:  # %bb.18: # %entry
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:  .LBB8_19: # %entry
-; i686-NEXT:    jb .LBB8_21
-; i686-NEXT:  # %bb.20: # %entry
-; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
-; i686-NEXT:  .LBB8_21: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
-; i686-NEXT:    movl %ebp, %ebx
+; i686-NEXT:  .LBB8_22: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movl %eax, %edi
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    shll %cl, %ebx
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    shldl %cl, %ebp, %edi
-; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    movl %ebx, %ecx
-; i686-NEXT:    jne .LBB8_23
-; i686-NEXT:  # %bb.22: # %entry
-; i686-NEXT:    movl %edi, %ecx
-; i686-NEXT:  .LBB8_23: # %entry
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl %eax, %ecx
 ; i686-NEXT:    shll %cl, %edi
+; i686-NEXT:    movl %ebx, %ebp
+; i686-NEXT:    shldl %cl, %eax, %ebp
+; i686-NEXT:    testb $32, %cl
 ; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    testb $32, %al
-; i686-NEXT:    movl $0, %edi
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; i686-NEXT:    jne .LBB8_25
-; i686-NEXT:  # %bb.24: # %entry
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
-; i686-NEXT:  .LBB8_25: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    jne .LBB8_27
-; i686-NEXT:  # %bb.26: # %entry
+; i686-NEXT:    jne .LBB8_24
+; i686-NEXT:  # %bb.23: # %entry
+; i686-NEXT:    movl %ebp, %edi
+; i686-NEXT:  .LBB8_24: # %entry
 ; i686-NEXT:    movl %edi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB8_27: # %entry
-; i686-NEXT:    movl %eax, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movl %edx, %ecx
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; i686-NEXT:    shldl %cl, %edi, %esi
-; i686-NEXT:    testb $32, %al
-; i686-NEXT:    jne .LBB8_29
-; i686-NEXT:  # %bb.28: # %entry
+; i686-NEXT:    shldl %cl, %edi, %ebx
+; i686-NEXT:    testb $32, %dl
+; i686-NEXT:    jne .LBB8_26
+; i686-NEXT:  # %bb.25: # %entry
+; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB8_26: # %entry
 ; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB8_29: # %entry
-; i686-NEXT:    cmpb $0, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; i686-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 1-byte Folded Reload
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    subl $64, %ecx
+; i686-NEXT:    movl %ebp, %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; i686-NEXT:    sbbl $0, %edx
+; i686-NEXT:    setae %dl
+; i686-NEXT:    jb .LBB8_28
+; i686-NEXT:  # %bb.27: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:  .LBB8_28: # %entry
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; i686-NEXT:    shll %cl, %esi
+; i686-NEXT:    testb $32, %cl
+; i686-NEXT:    movl $0, %ebx
 ; i686-NEXT:    jne .LBB8_30
-; i686-NEXT:  # %bb.31: # %entry
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    je .LBB8_32
-; i686-NEXT:  .LBB8_33: # %entry
-; i686-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    jne .LBB8_35
-; i686-NEXT:  .LBB8_34: # %entry
+; i686-NEXT:  # %bb.29: # %entry
+; i686-NEXT:    movl %esi, %ebx
+; i686-NEXT:  .LBB8_30: # %entry
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    jne .LBB8_32
+; i686-NEXT:  # %bb.31:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Folded Reload
+; i686-NEXT:  .LBB8_32: # %entry
 ; i686-NEXT:    movl %ebx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB8_35: # %entry
-; i686-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
 ; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    shrdl %cl, %ebx, %esi
+; i686-NEXT:    shldl %cl, %ebp, %ebx
 ; i686-NEXT:    testb $32, %cl
-; i686-NEXT:    jne .LBB8_37
+; i686-NEXT:    jne .LBB8_34
+; i686-NEXT:  # %bb.33: # %entry
+; i686-NEXT:    movl %ebx, %esi
+; i686-NEXT:  .LBB8_34: # %entry
+; i686-NEXT:    testb %dl, %dl
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebp
+; i686-NEXT:    movzbl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 1-byte Folded Reload
+; i686-NEXT:    je .LBB8_35
 ; i686-NEXT:  # %bb.36: # %entry
-; i686-NEXT:    movl %esi, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB8_37: # %entry
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    jne .LBB8_38
-; i686-NEXT:  # %bb.39: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    jne .LBB8_41
-; i686-NEXT:    jmp .LBB8_42
-; i686-NEXT:  .LBB8_30:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    orl %ebp, %ecx
-; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    jne .LBB8_33
-; i686-NEXT:  .LBB8_32: # %entry
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    testb %cl, %cl
+; i686-NEXT:    je .LBB8_38
+; i686-NEXT:    jmp .LBB8_39
+; i686-NEXT:  .LBB8_35:
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %esi # 4-byte Folded Reload
+; i686-NEXT:    testb %cl, %cl
+; i686-NEXT:    jne .LBB8_39
+; i686-NEXT:  .LBB8_38: # %entry
 ; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:  .LBB8_39: # %entry
 ; i686-NEXT:    testb $32, {{[-0-9]+}}(%e{{[sb]}}p) # 1-byte Folded Reload
-; i686-NEXT:    je .LBB8_34
-; i686-NEXT:    jmp .LBB8_35
-; i686-NEXT:  .LBB8_38:
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ebp # 4-byte Folded Reload
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; i686-NEXT:    testb %al, %al
-; i686-NEXT:    je .LBB8_42
-; i686-NEXT:  .LBB8_41:
+; i686-NEXT:    movl $0, %eax
+; i686-NEXT:    jne .LBB8_41
+; i686-NEXT:  # %bb.40: # %entry
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
-; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB8_42: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl %eax, %edx
-; i686-NEXT:    je .LBB8_44
-; i686-NEXT:  # %bb.43: # %entry
-; i686-NEXT:    movl %ebp, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB8_41: # %entry
+; i686-NEXT:    testb %cl, %cl
+; i686-NEXT:    je .LBB8_43
+; i686-NEXT:  # %bb.42:
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
-; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
-; i686-NEXT:  .LBB8_44: # %entry
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; i686-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %edx
-; i686-NEXT:    orl {{[0-9]+}}(%esp), %ebx
-; i686-NEXT:    orl %edx, %ebx
-; i686-NEXT:    je .LBB8_46
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Folded Reload
+; i686-NEXT:  .LBB8_43: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    testb %cl, %cl
+; i686-NEXT:    jne .LBB8_44
 ; i686-NEXT:  # %bb.45: # %entry
+; i686-NEXT:    movl $0, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Folded Spill
+; i686-NEXT:    jmp .LBB8_46
+; i686-NEXT:  .LBB8_44:
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edi # 4-byte Reload
+; i686-NEXT:    orl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Folded Reload
+; i686-NEXT:    movl %ecx, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
 ; i686-NEXT:  .LBB8_46: # %entry
-; i686-NEXT:    movl %esi, 20(%eax)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
-; i686-NEXT:    movl %edx, 16(%eax)
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ebx # 4-byte Reload
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    orl %edi, %ecx
+; i686-NEXT:    je .LBB8_48
+; i686-NEXT:  # %bb.47: # %entry
+; i686-NEXT:    movl %eax, %ebp
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %eax # 4-byte Reload
+; i686-NEXT:    movl %eax, {{[-0-9]+}}(%e{{[sb]}}p) # 4-byte Spill
+; i686-NEXT:  .LBB8_48: # %entry
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    orl %edi, %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; i686-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; i686-NEXT:    je .LBB8_50
+; i686-NEXT:  # %bb.49: # %entry
+; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
+; i686-NEXT:    movl %esi, %edi
+; i686-NEXT:  .LBB8_50: # %entry
+; i686-NEXT:    movl %edx, 20(%eax)
+; i686-NEXT:    movl %ebx, 16(%eax)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
 ; i686-NEXT:    movl %edx, 4(%eax)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %edx # 4-byte Reload
@@ -1314,9 +1402,8 @@
 ; i686-NEXT:    movl %ecx, 24(%eax)
 ; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
 ; i686-NEXT:    movl %ecx, 12(%eax)
-; i686-NEXT:    movl {{[-0-9]+}}(%e{{[sb]}}p), %ecx # 4-byte Reload
-; i686-NEXT:    movl %ecx, 8(%eax)
-; i686-NEXT:    addl $72, %esp
+; i686-NEXT:    movl %ebp, 8(%eax)
+; i686-NEXT:    addl $68, %esp
 ; i686-NEXT:    popl %esi
 ; i686-NEXT:    popl %edi
 ; i686-NEXT:    popl %ebx