diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -4802,6 +4802,17 @@
       EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond,
       DAGCombinerInfo &DCI, const SDLoc &DL) const;
 
+  // Simplify SETCC testing shifted value for equality/non-equality to zero by
+  // removing redundant operations generated during shift's expansion.
+  // Shift's expansion (when its result fed into SETCC eq/ne 0) generates tree
+  // consisting of OR and multiple SRL/SHL (that may be combined into
+  // FSHL/FSHR). When such pair of shifts applied to the same operand it
+  // performs rotation and it could be eliminated as far as the overall result
+  // is compared with zero.
+  SDValue optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0, SDValue N1C,
+                                       ISD::CondCode Cond, DAGCombinerInfo &DCI,
+                                       const SDLoc &DL) const;
+
   SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode,
                             SDValue CompTargetNode, ISD::CondCode Cond,
                             DAGCombinerInfo &DCI, const SDLoc &DL,
diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -3662,6 +3662,150 @@
   return T2;
 }
 
+namespace {
+/// Helper class for optimizeSetCCOfExpandedShift.
+/// Scans an expression tree consiting of ORs and shifts to find and replace
+/// shift pairs performing rotation with a rotation's operand.
+class ExpandedShiftsSimplifier {
+  struct ShiftInfo {
+    APInt Bits;
+    bool IsLeft;
+  };
+  SmallDenseMap<SDValue, ShiftInfo, 16> UnmatchedShifts;
+  SmallVector<SDValue, 16> Result;
+  unsigned MatchedShiftsCount = 0;
+
+  /// Match pairs of shifts applied to the same operand that effectively
+  /// perform its rotation:
+  /// 1) create a new entry in UnmatchedShifts map if Op was observed
+  ///    for the first time;
+  /// 2) if UnmatchedShifts map contains an entry for the Op check that
+  ///    it was created for a shift in opposite direction and that
+  ///    amount of bits in these two shifts is summed up to OpSizeInBits.
+  bool matchShifts(SDValue Op, const APInt &C, bool IsLeft) {
+    ShiftInfo &Info = UnmatchedShifts[Op];
+    if (Info.Bits.isZero()) {
+      Info.Bits = C;
+      Info.IsLeft = IsLeft;
+      return true;
+    }
+    if (Info.IsLeft == IsLeft)
+      return false;
+    if (Info.Bits + C != Op.getValueSizeInBits())
+      return false;
+    Result.push_back(Op);
+    UnmatchedShifts.erase(Op);
+    ++MatchedShiftsCount;
+    return true;
+  }
+
+  /// Recursively scan DAG to match all shifts while following conditions are
+  /// met:
+  /// 1) every node should has only one use;
+  /// 2) every shift should be either first shift found for its operand
+  ///    or there shoud be previously found matching shift in opposite
+  ///    direction;
+  /// 3) Depth should be lower than SelectionDAG::MaxRecursionDepth
+  bool scan(SDValue Value, unsigned Depth = 0) {
+    if (Depth >= SelectionDAG::MaxRecursionDepth || !Value->hasOneUse())
+      return false;
+    unsigned Opcode = Value->getOpcode();
+    bool IsShiftLeft = Opcode == ISD::SHL || Opcode == ISD::FSHL;
+    ConstantSDNode *C;
+
+    if (Opcode == ISD::OR)
+      return scan(Value->getOperand(0), Depth + 1) &&
+             scan(Value->getOperand(1), Depth + 1);
+    if (Opcode == ISD::SRL || Opcode == ISD::SHL) {
+      if (!(C = dyn_cast<ConstantSDNode>(Value->getOperand(1))))
+        return false;
+      SDValue Op = Value->getOperand(0);
+      return matchShifts(Op, C->getAPIntValue(), IsShiftLeft);
+    }
+    if (Opcode == ISD::FSHL || Opcode == ISD::FSHR) {
+      if (!(C = dyn_cast<ConstantSDNode>(Value->getOperand(2))))
+        return false;
+      SDValue Op1 = Value->getOperand(0);
+      SDValue Op2 = Value->getOperand(1);
+      const APInt &CVal = C->getAPIntValue();
+      // For funnel shifts second operand is effectively shifted
+      // in opposite direction.
+      return matchShifts(Op1, CVal, IsShiftLeft) &&
+             matchShifts(Op2, Value.getValueSizeInBits() - CVal, !IsShiftLeft);
+    }
+    Result.push_back(Value);
+    return true;
+  }
+
+public:
+  SDValue simplify(SDValue Value, SelectionDAG &DAG, const SDLoc &DL) {
+    assert(Result.size() == 0 && UnmatchedShifts.size() == 0 &&
+           MatchedShiftsCount == 0 && "simplify could be called only once");
+
+    if (!scan(Value))
+      return SDValue();
+    // There should be at most one unmatched shift and at least one pair
+    // of matched shifts.
+    if (MatchedShiftsCount == 0 || UnmatchedShifts.size() > 1 || Result.empty())
+      return SDValue();
+    // Recreate value for unmatched shift.
+    if (!UnmatchedShifts.empty()) {
+      DenseMap<SDValue, ShiftInfo>::iterator UnmatchedShift =
+          UnmatchedShifts.begin();
+      SDValue Op = UnmatchedShift->first;
+      ShiftInfo &Info = UnmatchedShift->second;
+      EVT ShiftTy =
+          EVT::getIntegerVT(*DAG.getContext(), Info.Bits.getBitWidth());
+      SDValue Con = DAG.getConstant(Info.Bits, DL, ShiftTy);
+      unsigned ShiftOpcode = Info.IsLeft ? ISD::SHL : ISD::SRL;
+      Result.push_back(
+          DAG.getNode(ShiftOpcode, DL, Value.getValueType(), Op, Con));
+    }
+    // Reduce all values using OR.
+    // Push new OR back to the Result list and combine pairs of values from it
+    // to generate balanced tree and shorten the critical path.
+    for (size_t Index = 0; Index + 1 < Result.size(); Index += 2) {
+      SDValue NewOr = DAG.getNode(ISD::OR, DL, Value.getValueType(),
+                                  Result[Index], Result[Index + 1]);
+      Result.push_back(NewOr);
+    }
+    return Result.back();
+  }
+};
+
+} // end anonymous namespace
+
+// Example of redundant shifts elimination:
+// (or (or (srl X, C0), (shl Y, C1)), (srl Y, C0)) ==/!= 0
+//    -->  (or (srl X, C0), Y) ==/!= 0
+//
+// (or (or (srl Y, C0), (shl X, C1)), (shl Y, C1)) ==/!= 0
+//    -->  (or (shl X, C1), Y) ==/!= 0
+//
+// (or (srl X, C), (fshr X, Y, C)) ==/!= 0  -->  (or (srl Y, C), X)
+//
+// (or (or (fshl W, X, C), (fshl X, Y, C)),
+//     (or (fshl Y, Z, C), (shl Z, C)))) ==/!= 0
+//    -->  (or (or (shl W, C), X), (or Y, Z))
+SDValue TargetLowering::optimizeSetCCOfExpandedShift(EVT SCCVT, SDValue N0,
+                                                     SDValue N1C,
+                                                     ISD::CondCode Cond,
+                                                     DAGCombinerInfo &DCI,
+                                                     const SDLoc &DL) const {
+  assert(isNullOrNullSplat(N1C) && "Should be a comparison with 0.");
+  assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && "Unexpected condcode");
+
+  if (N0.getValueType().isVector())
+    return SDValue();
+
+  SelectionDAG &DAG = DCI.DAG;
+  ExpandedShiftsSimplifier matcher;
+  if (SDValue ReducedTree = matcher.simplify(N0, DAG, DL))
+    return DAG.getSetCC(DL, SCCVT, ReducedTree, N1C, Cond);
+
+  return SDValue();
+}
+
 /// Try to fold an equality comparison with a {add/sub/xor} binary operation as
 /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to
 /// handle the commuted versions of these patterns.
@@ -4346,6 +4490,12 @@
           }
         }
       }
+      if (CmpZero)
+        // Try to simplify expanded shift by removing shift operations
+        // that effectively perform rotation.
+        if (SDValue CC =
+                optimizeSetCCOfExpandedShift(VT, N0, N1, Cond, DCI, dl))
+          return CC;
     }
 
     // If we have "setcc X, C0", check to see if we can shrink the immediate
diff --git a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
--- a/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/AArch64/icmp-shift-opt.ll
@@ -12,8 +12,7 @@
 ; CHECK-NEXT:    // =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    adds x0, x0, #1
 ; CHECK-NEXT:    adcs x1, x1, xzr
-; CHECK-NEXT:    extr x8, x1, x0, #60
-; CHECK-NEXT:    orr x8, x8, x1, lsr #60
+; CHECK-NEXT:    orr x8, x1, x0, lsr #60
 ; CHECK-NEXT:    cbnz x8, .LBB0_1
 ; CHECK-NEXT:  // %bb.2: // %exit
 ; CHECK-NEXT:    ret
@@ -32,8 +31,7 @@
 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_srl_eq_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #17
-; CHECK-NEXT:    orr x8, x8, x1, lsr #17
+; CHECK-NEXT:    orr x8, x1, x0, lsr #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -45,8 +43,7 @@
 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_srl_ne_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #17
-; CHECK-NEXT:    orr x8, x8, x1, lsr #17
+; CHECK-NEXT:    orr x8, x1, x0, lsr #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -58,8 +55,7 @@
 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_eq_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #47
-; CHECK-NEXT:    orr x8, x8, x0, lsl #17
+; CHECK-NEXT:    orr x8, x0, x1, lsl #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
@@ -71,8 +67,7 @@
 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_ne_zero:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x1, x0, #47
-; CHECK-NEXT:    orr x8, x8, x0, lsl #17
+; CHECK-NEXT:    orr x8, x0, x1, lsl #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, ne
 ; CHECK-NEXT:    ret
@@ -106,8 +101,7 @@
 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
 ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    extr x8, x0, x1, #47
-; CHECK-NEXT:    orr x8, x8, x1, lsl #17
+; CHECK-NEXT:    orr x8, x1, x0, lsl #17
 ; CHECK-NEXT:    cmp x8, #0
 ; CHECK-NEXT:    cset w0, eq
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
--- a/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
+++ b/llvm/test/CodeGen/ARM/consthoist-icmpimm.ll
@@ -630,14 +630,10 @@
 ; CHECKV7M-NEXT:    ldrd lr, r0, [sp, #8]
 ; CHECKV7M-NEXT:    beq .LBB6_2
 ; CHECKV7M-NEXT:  @ %bb.1: @ %then
-; CHECKV7M-NEXT:    lsrs r2, r2, #17
-; CHECKV7M-NEXT:    orr.w r2, r2, r3, lsl #15
-; CHECKV7M-NEXT:    orr.w r2, r2, r3, lsr #17
-; CHECKV7M-NEXT:    lsr.w r3, r12, #17
-; CHECKV7M-NEXT:    orr.w r3, r3, r1, lsl #15
+; CHECKV7M-NEXT:    orr.w r2, r3, r2, lsr #17
+; CHECKV7M-NEXT:    orr.w r1, r1, r12, lsr #17
 ; CHECKV7M-NEXT:    cmp r2, #0
 ; CHECKV7M-NEXT:    mov r2, r0
-; CHECKV7M-NEXT:    orr.w r1, r3, r1, lsr #17
 ; CHECKV7M-NEXT:    it ne
 ; CHECKV7M-NEXT:    movne r2, lr
 ; CHECKV7M-NEXT:    cmp r1, #0
@@ -646,9 +642,7 @@
 ; CHECKV7M-NEXT:    add r0, r2
 ; CHECKV7M-NEXT:    pop {r7, pc}
 ; CHECKV7M-NEXT:  .LBB6_2: @ %else
-; CHECKV7M-NEXT:    lsrs r1, r2, #17
-; CHECKV7M-NEXT:    orr.w r1, r1, r3, lsl #15
-; CHECKV7M-NEXT:    orr.w r1, r1, r3, lsr #17
+; CHECKV7M-NEXT:    orr.w r1, r3, r2, lsr #17
 ; CHECKV7M-NEXT:    cmp r1, #0
 ; CHECKV7M-NEXT:    it ne
 ; CHECKV7M-NEXT:    movne r0, lr
@@ -664,14 +658,10 @@
 ; CHECKV7A-NEXT:    lsls r4, r4, #31
 ; CHECKV7A-NEXT:    beq .LBB6_2
 ; CHECKV7A-NEXT:  @ %bb.1: @ %then
-; CHECKV7A-NEXT:    lsrs r2, r2, #17
-; CHECKV7A-NEXT:    orr.w r2, r2, r3, lsl #15
-; CHECKV7A-NEXT:    orr.w r2, r2, r3, lsr #17
-; CHECKV7A-NEXT:    lsr.w r3, r12, #17
-; CHECKV7A-NEXT:    orr.w r3, r3, r1, lsl #15
+; CHECKV7A-NEXT:    orr.w r2, r3, r2, lsr #17
+; CHECKV7A-NEXT:    orr.w r1, r1, r12, lsr #17
 ; CHECKV7A-NEXT:    cmp r2, #0
 ; CHECKV7A-NEXT:    mov r2, r0
-; CHECKV7A-NEXT:    orr.w r1, r3, r1, lsr #17
 ; CHECKV7A-NEXT:    it ne
 ; CHECKV7A-NEXT:    movne r2, lr
 ; CHECKV7A-NEXT:    cmp r1, #0
@@ -680,9 +670,7 @@
 ; CHECKV7A-NEXT:    add r0, r2
 ; CHECKV7A-NEXT:    pop {r4, pc}
 ; CHECKV7A-NEXT:  .LBB6_2: @ %else
-; CHECKV7A-NEXT:    lsrs r1, r2, #17
-; CHECKV7A-NEXT:    orr.w r1, r1, r3, lsl #15
-; CHECKV7A-NEXT:    orr.w r1, r1, r3, lsr #17
+; CHECKV7A-NEXT:    orr.w r1, r3, r2, lsr #17
 ; CHECKV7A-NEXT:    cmp r1, #0
 ; CHECKV7A-NEXT:    it ne
 ; CHECKV7A-NEXT:    movne r0, lr
diff --git a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
--- a/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/ARM/icmp-shift-opt.ll
@@ -12,9 +12,7 @@
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    adds r0, r0, #1
 ; CHECK-NEXT:    adc r1, r1, #0
-; CHECK-NEXT:    lsr r2, r0, #16
-; CHECK-NEXT:    orr r2, r2, r1, lsl #16
-; CHECK-NEXT:    orr r2, r2, r1, lsr #16
+; CHECK-NEXT:    orr r2, r1, r0, lsr #16
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    bne .LBB0_1
 ; CHECK-NEXT:  @ %bb.2: @ %exit
@@ -34,9 +32,7 @@
 define i1 @opt_setcc_srl_eq_zero(i64 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_srl_eq_zero:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsr r0, r0, #17
-; CHECK-NEXT:    orr r0, r0, r1, lsl #15
-; CHECK-NEXT:    orr r0, r0, r1, lsr #17
+; CHECK-NEXT:    orr r0, r1, r0, lsr #17
 ; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    lsr r0, r0, #5
 ; CHECK-NEXT:    bx lr
@@ -48,9 +44,7 @@
 define i1 @opt_setcc_srl_ne_zero(i64 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_srl_ne_zero:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsr r0, r0, #17
-; CHECK-NEXT:    orr r0, r0, r1, lsl #15
-; CHECK-NEXT:    orr r0, r0, r1, lsr #17
+; CHECK-NEXT:    orr r0, r1, r0, lsr #17
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
@@ -62,9 +56,7 @@
 define i1 @opt_setcc_shl_eq_zero(i64 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_eq_zero:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsl r1, r1, #17
-; CHECK-NEXT:    orr r1, r1, r0, lsr #15
-; CHECK-NEXT:    orr r0, r1, r0, lsl #17
+; CHECK-NEXT:    orr r0, r0, r1, lsl #17
 ; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    lsr r0, r0, #5
 ; CHECK-NEXT:    bx lr
@@ -76,9 +68,7 @@
 define i1 @opt_setcc_shl_ne_zero(i64 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_ne_zero:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsl r1, r1, #17
-; CHECK-NEXT:    orr r1, r1, r0, lsr #15
-; CHECK-NEXT:    orr r0, r1, r0, lsl #17
+; CHECK-NEXT:    orr r0, r0, r1, lsl #17
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
@@ -113,9 +103,7 @@
 define i1 @opt_setcc_expanded_shl_correct_shifts(i32 %a, i32 %b) nounwind {
 ; CHECK-LABEL: opt_setcc_expanded_shl_correct_shifts:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsl r0, r0, #17
-; CHECK-NEXT:    orr r0, r0, r1, lsr #15
-; CHECK-NEXT:    orr r0, r0, r1, lsl #17
+; CHECK-NEXT:    orr r0, r1, r0, lsl #17
 ; CHECK-NEXT:    clz r0, r0
 ; CHECK-NEXT:    lsr r0, r0, #5
 ; CHECK-NEXT:    bx lr
@@ -151,15 +139,9 @@
 define i1 @opt_setcc_shl_ne_zero_i128(i128 %a) nounwind {
 ; CHECK-LABEL: opt_setcc_shl_ne_zero_i128:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    lsl r3, r3, #17
-; CHECK-NEXT:    orr r12, r3, r2, lsr #15
-; CHECK-NEXT:    lsl r3, r1, #17
-; CHECK-NEXT:    lsl r2, r2, #17
-; CHECK-NEXT:    orr r3, r3, r0, lsr #15
-; CHECK-NEXT:    orr r1, r2, r1, lsr #15
-; CHECK-NEXT:    orr r3, r3, r12
-; CHECK-NEXT:    orr r0, r1, r0, lsl #17
-; CHECK-NEXT:    orrs r0, r0, r3
+; CHECK-NEXT:    orr r2, r2, r3, lsl #17
+; CHECK-NEXT:    orr r0, r1, r0
+; CHECK-NEXT:    orrs r0, r0, r2
 ; CHECK-NEXT:    movwne r0, #1
 ; CHECK-NEXT:    bx lr
   %shl = shl i128 %a, 17
diff --git a/llvm/test/CodeGen/X86/icmp-shift-opt.ll b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
--- a/llvm/test/CodeGen/X86/icmp-shift-opt.ll
+++ b/llvm/test/CodeGen/X86/icmp-shift-opt.ll
@@ -13,34 +13,29 @@
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 ; X86-NEXT:    .p2align 4, 0x90
 ; X86-NEXT:  .LBB0_1: # %loop
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    addl $1, %ecx
+; X86-NEXT:    addl $1, %edi
 ; X86-NEXT:    adcl $0, %esi
 ; X86-NEXT:    adcl $0, %edx
-; X86-NEXT:    adcl $0, %ebx
-; X86-NEXT:    movl %ebx, %edi
-; X86-NEXT:    shldl $4, %edx, %edi
-; X86-NEXT:    movl %edx, %ebp
-; X86-NEXT:    shldl $4, %esi, %ebp
-; X86-NEXT:    movl %ecx, %eax
-; X86-NEXT:    movl %ebx, %ecx
-; X86-NEXT:    shrl $28, %ecx
-; X86-NEXT:    orl %ebp, %ecx
-; X86-NEXT:    orl %edi, %ecx
-; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    adcl $0, %ecx
+; X86-NEXT:    movl %ecx, %ebx
+; X86-NEXT:    orl %edx, %ebx
+; X86-NEXT:    movl %esi, %ebp
+; X86-NEXT:    shrl $28, %ebp
+; X86-NEXT:    orl %ebx, %ebp
 ; X86-NEXT:    jne .LBB0_1
 ; X86-NEXT:  # %bb.2: # %exit
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %ecx, (%eax)
+; X86-NEXT:    movl %edi, (%eax)
 ; X86-NEXT:    movl %esi, 4(%eax)
 ; X86-NEXT:    movl %edx, 8(%eax)
-; X86-NEXT:    movl %ebx, 12(%eax)
+; X86-NEXT:    movl %ecx, 12(%eax)
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
@@ -56,11 +51,9 @@
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
 ; X64-NEXT:    addq $1, %rax
 ; X64-NEXT:    adcq $0, %rdx
-; X64-NEXT:    movq %rdx, %rcx
-; X64-NEXT:    shldq $4, %rax, %rcx
-; X64-NEXT:    movq %rdx, %rsi
-; X64-NEXT:    shrq $60, %rsi
-; X64-NEXT:    orq %rcx, %rsi
+; X64-NEXT:    movq %rax, %rcx
+; X64-NEXT:    shrq $60, %rcx
+; X64-NEXT:    orq %rdx, %rcx
 ; X64-NEXT:    jne .LBB0_1
 ; X64-NEXT:  # %bb.2: # %exit
 ; X64-NEXT:    retq
@@ -79,30 +72,19 @@
 define i1 @opt_setcc_srl_eq_zero(i128 %a) nounwind {
 ; X86-LABEL: opt_setcc_srl_eq_zero:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, %edi
-; X86-NEXT:    shldl $15, %edx, %edi
-; X86-NEXT:    shldl $15, %ecx, %edx
-; X86-NEXT:    shrdl $17, %ecx, %eax
-; X86-NEXT:    orl %edi, %eax
-; X86-NEXT:    shrl $17, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $17, %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    sete %al
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: opt_setcc_srl_eq_zero:
 ; X64:       # %bb.0:
-; X64-NEXT:    shrdq $17, %rsi, %rdi
-; X64-NEXT:    shrq $17, %rsi
-; X64-NEXT:    orq %rdi, %rsi
+; X64-NEXT:    shrq $17, %rdi
+; X64-NEXT:    orq %rsi, %rdi
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
    %srl = lshr i128 %a, 17
@@ -113,30 +95,19 @@
 define i1 @opt_setcc_srl_ne_zero(i128 %a) nounwind {
 ; X86-LABEL: opt_setcc_srl_ne_zero:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl %esi, %edi
-; X86-NEXT:    shldl $15, %edx, %edi
-; X86-NEXT:    shldl $15, %ecx, %edx
-; X86-NEXT:    shrdl $17, %ecx, %eax
-; X86-NEXT:    orl %edi, %eax
-; X86-NEXT:    shrl $17, %esi
-; X86-NEXT:    orl %edx, %esi
-; X86-NEXT:    orl %eax, %esi
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    shrl $17, %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    setne %al
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: opt_setcc_srl_ne_zero:
 ; X64:       # %bb.0:
-; X64-NEXT:    shrdq $17, %rsi, %rdi
-; X64-NEXT:    shrq $17, %rsi
-; X64-NEXT:    orq %rdi, %rsi
+; X64-NEXT:    shrq $17, %rdi
+; X64-NEXT:    orq %rsi, %rdi
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
    %srl = lshr i128 %a, 17
@@ -147,27 +118,19 @@
 define i1 @opt_setcc_shl_eq_zero(i128 %a) nounwind {
 ; X86-LABEL: opt_setcc_shl_eq_zero:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    shldl $17, %esi, %edx
-; X86-NEXT:    shldl $17, %ecx, %esi
-; X86-NEXT:    shldl $17, %eax, %ecx
-; X86-NEXT:    shll $17, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $17, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    orl %eax, %ecx
 ; X86-NEXT:    sete %al
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: opt_setcc_shl_eq_zero:
 ; X64:       # %bb.0:
-; X64-NEXT:    shldq $17, %rdi, %rsi
-; X64-NEXT:    shlq $17, %rdi
-; X64-NEXT:    orq %rsi, %rdi
+; X64-NEXT:    shlq $17, %rsi
+; X64-NEXT:    orq %rdi, %rsi
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
    %shl = shl i128 %a, 17
@@ -178,27 +141,19 @@
 define i1 @opt_setcc_shl_ne_zero(i128 %a) nounwind {
 ; X86-LABEL: opt_setcc_shl_ne_zero:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    shldl $17, %esi, %edx
-; X86-NEXT:    shldl $17, %ecx, %esi
-; X86-NEXT:    shldl $17, %eax, %ecx
-; X86-NEXT:    shll $17, %eax
-; X86-NEXT:    orl %esi, %eax
-; X86-NEXT:    orl %edx, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    shll $17, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    orl %eax, %ecx
 ; X86-NEXT:    setne %al
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: opt_setcc_shl_ne_zero:
 ; X64:       # %bb.0:
-; X64-NEXT:    shldq $17, %rdi, %rsi
-; X64-NEXT:    shlq $17, %rdi
-; X64-NEXT:    orq %rsi, %rdi
+; X64-NEXT:    shlq $17, %rsi
+; X64-NEXT:    orq %rdi, %rsi
 ; X64-NEXT:    setne %al
 ; X64-NEXT:    retq
    %shl = shl i128 %a, 17
@@ -262,27 +217,19 @@
 define i1 @opt_setcc_expanded_shl_correct_shifts(i64 %a, i64 %b) nounwind {
 ; X86-LABEL: opt_setcc_expanded_shl_correct_shifts:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    shldl $17, %edx, %esi
-; X86-NEXT:    shldl $17, %ecx, %edx
-; X86-NEXT:    shldl $17, %eax, %ecx
-; X86-NEXT:    shll $17, %eax
-; X86-NEXT:    orl %edx, %eax
-; X86-NEXT:    orl %esi, %ecx
+; X86-NEXT:    shll $17, %ecx
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    orl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    orl %eax, %ecx
 ; X86-NEXT:    sete %al
-; X86-NEXT:    popl %esi
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: opt_setcc_expanded_shl_correct_shifts:
 ; X64:       # %bb.0:
-; X64-NEXT:    shldq $17, %rsi, %rdi
-; X64-NEXT:    shlq $17, %rsi
-; X64-NEXT:    orq %rdi, %rsi
+; X64-NEXT:    shlq $17, %rdi
+; X64-NEXT:    orq %rsi, %rdi
 ; X64-NEXT:    sete %al
 ; X64-NEXT:    retq
   %shl.a = shl i64 %a, 17