diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -3327,6 +3327,21 @@
     return true;
   }
 
+  /// Return true if it's profitable to replace
+  ///
+  ///   shift x, non-constant
+  ///
+  /// with two instances of
+  ///
+  ///   shift x, constant
+  ///
+  /// where `shift` is a shift or rotate operation of the given opcode.
+  virtual bool
+  shiftOrRotateIsFasterWithConstantShiftAmount(unsigned opcode,
+                                               CombineLevel level) const {
+    return false;
+  }
+
   // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern
   // to a shuffle and a truncate.
   // Example of such a combine:
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -488,6 +488,9 @@
     SDValue reassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
                            SDValue N1, SDNodeFlags Flags);
 
+    // SHL, SRA, SRL, RTOL, ROTR, but FSHL or FSHR.
+    SDValue visitShiftOrRotate(SDNode *N);
+
     SDValue visitShiftByConstant(SDNode *N);
 
     SDValue foldSelectOfConstants(SDNode *N);
@@ -7120,6 +7123,32 @@
   return DAG.getNode(LogicOpcode, DL, VT, NewShift1, NewShift2);
 }
 
+SDValue DAGCombiner::visitShiftOrRotate(SDNode *N) {
+  auto ShiftOpcode = N->getOpcode();
+  SDValue LHS = N->getOperand(0);
+  SDValue RHS = N->getOperand(1);
+
+  // On some targets, shifting/rotating by a constant is faster than
+  // shifting/rotating by a register, so we fold:
+  //
+  //   shift lhs, (select cond, constant1, constant2) -->
+  //   select cond, (shift lhs, constant1), (shift lhs, constant2)
+  //
+  // TODO: This logic could be extended to ops other than shift/rotate.
+  if (RHS.getOpcode() == ISD::SELECT && RHS.hasOneUse() &&
+      isa<ConstantSDNode>(RHS.getOperand(1)) &&
+      isa<ConstantSDNode>(RHS.getOperand(2)) &&
+      TLI.shiftOrRotateIsFasterWithConstantShiftAmount(ShiftOpcode, Level)) {
+    SDLoc DL(N);
+    EVT VT = N->getValueType(0);
+    return DAG.getNode(
+        ISD::SELECT, DL, VT, RHS.getOperand(0),
+        DAG.getNode(ShiftOpcode, DL, VT, LHS, RHS.getOperand(1)),
+        DAG.getNode(ShiftOpcode, DL, VT, LHS, RHS.getOperand(2)));
+  }
+  return SDValue();
+}
+
 /// Handle transforms common to the three shifts, when the shift amount is a
 /// constant.
 /// We are looking for: (shift being one of shl/sra/srl)
@@ -7227,6 +7256,9 @@
   EVT VT = N->getValueType(0);
   unsigned Bitsize = VT.getScalarSizeInBits();
 
+  if (SDValue V = visitShiftOrRotate(N))
+    return V;
+
   // fold (rot x, 0) -> x
   if (isNullOrNullSplat(N1))
     return N0;
@@ -7284,6 +7316,9 @@
   if (SDValue V = DAG.simplifyShift(N0, N1))
     return V;
 
+  if (SDValue V = visitShiftOrRotate(N))
+    return V;
+
   EVT VT = N0.getValueType();
   EVT ShiftVT = N1.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
@@ -7534,6 +7569,9 @@
   if (SDValue V = DAG.simplifyShift(N0, N1))
     return V;
 
+  if (SDValue V = visitShiftOrRotate(N))
+    return V;
+
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
@@ -7725,6 +7763,9 @@
   if (SDValue V = DAG.simplifyShift(N0, N1))
     return V;
 
+  if (SDValue V = visitShiftOrRotate(N))
+    return V;
+
   EVT VT = N0.getValueType();
   unsigned OpSizeInBits = VT.getScalarSizeInBits();
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h
--- a/llvm/lib/Target/X86/X86ISelLowering.h
+++ b/llvm/lib/Target/X86/X86ISelLowering.h
@@ -777,6 +777,9 @@
 
     SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override;
 
+    bool shiftOrRotateIsFasterWithConstantShiftAmount(
+        unsigned Opcode, CombineLevel Level) const override;
+
     // Return true if it is profitable to combine a BUILD_VECTOR with a
     // stride-pattern to a shuffle and a truncate.
     // Example of such a combine:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45265,6 +45265,15 @@
   return true;
 }
 
+bool X86TargetLowering::shiftOrRotateIsFasterWithConstantShiftAmount(
+    unsigned Opcode, CombineLevel /*Level*/) const {
+  // On most x86 chips, shifts/rotates by a constant are faster than
+  // shifts/rotates by a register.
+  assert(Opcode == ISD::SHL || Opcode == ISD::SRA || Opcode == ISD::SRL ||
+         Opcode == ISD::ROTL || Opcode == ISD::ROTR);
+  return true;
+}
+
 bool X86TargetLowering::
     isDesirableToCombineBuildVectorToShuffleTruncate(
         ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const {
diff --git a/llvm/test/CodeGen/X86/dagcombine-select.ll b/llvm/test/CodeGen/X86/dagcombine-select.ll
--- a/llvm/test/CodeGen/X86/dagcombine-select.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-select.ll
@@ -194,12 +194,10 @@
 define i32 @shl_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: shl_constant_sel_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    andb $1, %cl
-; CHECK-NEXT:    xorb $3, %cl
-; CHECK-NEXT:    movl $1, %eax
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shll %cl, %eax
+; CHECK-NEXT:    notb %dil
+; CHECK-NEXT:    movzbl %dil, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    leal 4(,%rax,4), %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 2, i32 3
   %bo = shl i32 1, %sel
@@ -209,12 +207,9 @@
 define i32 @lshr_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: lshr_constant_sel_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    andb $1, %cl
-; CHECK-NEXT:    xorb $3, %cl
-; CHECK-NEXT:    movl $64, %eax
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    leal 8(,%rdi,8), %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 2, i32 3
   %bo = lshr i32 64, %sel
@@ -224,12 +219,10 @@
 define i32 @ashr_constant_sel_constants(i1 %cond) {
 ; CHECK-LABEL: ashr_constant_sel_constants:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl %edi, %ecx
-; CHECK-NEXT:    andb $1, %cl
-; CHECK-NEXT:    xorb $3, %cl
-; CHECK-NEXT:    movl $128, %eax
-; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
-; CHECK-NEXT:    shrl %cl, %eax
+; CHECK-NEXT:    # kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    andl $1, %edi
+; CHECK-NEXT:    shll $4, %edi
+; CHECK-NEXT:    leal 16(%rdi), %eax
 ; CHECK-NEXT:    retq
   %sel = select i1 %cond, i32 2, i32 3
   %bo = ashr i32 128, %sel
diff --git a/llvm/test/CodeGen/X86/dagcombine-shifts.ll b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
--- a/llvm/test/CodeGen/X86/dagcombine-shifts.ll
+++ b/llvm/test/CodeGen/X86/dagcombine-shifts.ll
@@ -215,3 +215,143 @@
 
 declare void @f(i64)
 
+; The *_select tests below check that we do the following transformation:
+;
+;  shift lhs, (select cond, constant1, constant2) -->
+;  select cond, (shift lhs, constant1), (shift lhs, constant2)
+;
+; When updating these testcases, ensure that there are two shift instructions
+; in the result and that they take immediates rather than registers.
+define i32 @shl_select(i32 %x, i1 %cond) {
+; CHECK-LABEL: shl_select:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    shrl $3, %ecx
+; CHECK-NEXT:    shrl $6, %eax
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 3, i32 6
+  %ret = lshr i32 %x, %shift_amnt
+  ret i32 %ret
+}
+
+define i32 @ashr_select(i32 %x, i1 %cond) {
+; CHECK-LABEL: ashr_select:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    sarl $3, %ecx
+; CHECK-NEXT:    sarl $6, %eax
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 3, i32 6
+  %ret = ashr i32 %x, %shift_amnt
+  ret i32 %ret
+}
+
+define i32 @lshr_select(i32 %x, i1 %cond) {
+; CHECK-LABEL: lshr_select:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    shrl $3, %ecx
+; CHECK-NEXT:    shrl $6, %eax
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 3, i32 6
+  %ret = lshr i32 %x, %shift_amnt
+  ret i32 %ret
+}
+
+; Check that we don't perform the folding described in shl_select when the
+; shift width is used other than as an input to the shift instruction.
+;
+; When updating this testcase, check that there's exactly one shrl instruction
+; generated.
+declare void @i32_foo(i32)
+define i32 @shl_select_not_folded_if_shift_amnt_is_used(i32 %x, i1 %cond) {
+; CHECK-LABEL: shl_select_not_folded_if_shift_amnt_is_used:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    pushq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    pushq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    pushq %rax
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    .cfi_offset %rbx, -24
+; CHECK-NEXT:    .cfi_offset %rbp, -16
+; CHECK-NEXT:    movl %edi, %ebx
+; CHECK-NEXT:    notb %sil
+; CHECK-NEXT:    movzbl %sil, %eax
+; CHECK-NEXT:    andl $1, %eax
+; CHECK-NEXT:    leal 3(%rax,%rax,2), %ebp
+; CHECK-NEXT:    movl %ebp, %edi
+; CHECK-NEXT:    callq i32_foo
+; CHECK-NEXT:    movl %ebp, %ecx
+; CHECK-NEXT:    shrl %cl, %ebx
+; CHECK-NEXT:    movl %ebx, %eax
+; CHECK-NEXT:    addq $8, %rsp
+; CHECK-NEXT:    .cfi_def_cfa_offset 24
+; CHECK-NEXT:    popq %rbx
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    popq %rbp
+; CHECK-NEXT:    .cfi_def_cfa_offset 8
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 3, i32 6
+  call void @i32_foo(i32 %shift_amnt)
+  %ret = lshr i32 %x, %shift_amnt
+  ret i32 %ret
+}
+
+; Check that we don't perfrm the folding described in shl_select when one of
+; the shift widths is not a constant.
+;
+; When updating these testcases, check that there's exactly one shrl
+; instruction generated in each.
+define i32 @shl_select_not_folded_if_shift_amnt_is_nonconstant_1(i32 %x, i32 %a, i1 %cond) {
+; CHECK-LABEL: shl_select_not_folded_if_shift_amnt_is_nonconstant_1:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    movl $6, %ecx
+; CHECK-NEXT:    cmovnel %esi, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %eax
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 %a, i32 6
+  %ret = lshr i32 %x, %shift_amnt
+  ret i32 %ret
+}
+
+define i32 @shl_select_not_folded_if_shift_amnt_is_nonconstant_2(i32 %x, i32 %a, i1 %cond) {
+; CHECK-LABEL: shl_select_not_folded_if_shift_amnt_is_nonconstant_2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    testb $1, %dl
+; CHECK-NEXT:    movl $3, %ecx
+; CHECK-NEXT:    cmovel %esi, %ecx
+; CHECK-NEXT:    # kill: def $cl killed $cl killed $ecx
+; CHECK-NEXT:    shrl %cl, %eax
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 3, i32 %a
+  %ret = lshr i32 %x, %shift_amnt
+  ret i32 %ret
+}
+
+define i32 @shl_select_not_folded_if_shift_amnt_is_nonconstant_3(i32 %x, i32 %a, i32 %b, i1 %cond) {
+; CHECK-LABEL: shl_select_not_folded_if_shift_amnt_is_nonconstant_3:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    testb $1, %cl
+; CHECK-NEXT:    cmovel %edx, %esi
+; CHECK-NEXT:    movl %esi, %ecx
+; CHECK-NEXT:    shrl %cl, %eax
+; CHECK-NEXT:    retq
+  %shift_amnt = select i1 %cond, i32 %a, i32 %b
+  %ret = lshr i32 %x, %shift_amnt
+  ret i32 %ret
+}
diff --git a/llvm/test/CodeGen/X86/pr22338.ll b/llvm/test/CodeGen/X86/pr22338.ll
--- a/llvm/test/CodeGen/X86/pr22338.ll
+++ b/llvm/test/CodeGen/X86/pr22338.ll
@@ -5,51 +5,52 @@
 define i32 @fn(i32 %a0, i32 %a1) {
 ; X86-LABEL: fn:
 ; X86:       # %bb.0: # %entry
-; X86-NEXT:    pushl %ebx
+; X86-NEXT:    pushl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 8
-; X86-NEXT:    .cfi_offset %ebx, -8
-; X86-NEXT:    xorl %eax, %eax
-; X86-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    sete %cl
-; X86-NEXT:    setne %al
-; X86-NEXT:    cmpl $1, {{[0-9]+}}(%esp)
-; X86-NEXT:    sete %dl
-; X86-NEXT:    negl %eax
-; X86-NEXT:    addb %cl, %cl
-; X86-NEXT:    movl %eax, %ebx
-; X86-NEXT:    shll %cl, %ebx
-; X86-NEXT:    addb %dl, %dl
-; X86-NEXT:    movl %edx, %ecx
-; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    .cfi_offset %esi, -8
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    xorl %ecx, %ecx
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    setne %cl
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    leal (,%ecx,4), %eax
+; X86-NEXT:    cmpl $1, %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    jne .LBB0_2
+; X86-NEXT:  # %bb.1: # %entry
+; X86-NEXT:    movl %eax, %edx
+; X86-NEXT:  .LBB0_2: # %entry
+; X86-NEXT:    cmpl $1, %esi
+; X86-NEXT:    je .LBB0_4
+; X86-NEXT:  # %bb.3: # %entry
+; X86-NEXT:    movl %ecx, %eax
 ; X86-NEXT:    .p2align 4, 0x90
-; X86-NEXT:  .LBB0_1: # %bb1
+; X86-NEXT:  .LBB0_4: # %bb1
 ; X86-NEXT:    # =>This Inner Loop Header: Depth=1
-; X86-NEXT:    testl %ebx, %ebx
-; X86-NEXT:    je .LBB0_1
-; X86-NEXT:  # %bb.2: # %bb2
-; X86-NEXT:    popl %ebx
+; X86-NEXT:    testl %edx, %edx
+; X86-NEXT:    je .LBB0_4
+; X86-NEXT:  # %bb.5: # %bb2
+; X86-NEXT:    popl %esi
 ; X86-NEXT:    .cfi_def_cfa_offset 4
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: fn:
 ; X64:       # %bb.0: # %entry
-; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    xorl %edx, %edx
 ; X64-NEXT:    cmpl $1, %edi
-; X64-NEXT:    sete %cl
-; X64-NEXT:    setne %al
+; X64-NEXT:    setne %dl
+; X64-NEXT:    negl %edx
+; X64-NEXT:    leal (,%rdx,4), %eax
+; X64-NEXT:    cmpl $1, %edi
+; X64-NEXT:    movl %eax, %ecx
+; X64-NEXT:    cmovnel %edx, %ecx
 ; X64-NEXT:    cmpl $1, %esi
-; X64-NEXT:    sete %dl
-; X64-NEXT:    negl %eax
-; X64-NEXT:    addb %cl, %cl
-; X64-NEXT:    movl %eax, %esi
-; X64-NEXT:    shll %cl, %esi
-; X64-NEXT:    addb %dl, %dl
-; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    cmovnel %edx, %eax
 ; X64-NEXT:    .p2align 4, 0x90
 ; X64-NEXT:  .LBB0_1: # %bb1
 ; X64-NEXT:    # =>This Inner Loop Header: Depth=1
-; X64-NEXT:    testl %esi, %esi
+; X64-NEXT:    testl %ecx, %ecx
 ; X64-NEXT:    je .LBB0_1
 ; X64-NEXT:  # %bb.2: # %bb2
 ; X64-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/select.ll b/llvm/test/CodeGen/X86/select.ll
--- a/llvm/test/CodeGen/X86/select.ll
+++ b/llvm/test/CodeGen/X86/select.ll
@@ -1094,40 +1094,33 @@
 }
 
 define i32 @trunc_select_miscompile(i32 %a, i1 zeroext %cc) {
-; GENERIC-LABEL: trunc_select_miscompile:
-; GENERIC:       ## %bb.0:
-; GENERIC-NEXT:    ## kill: def $esi killed $esi def $rsi
-; GENERIC-NEXT:    movl %edi, %eax
-; GENERIC-NEXT:    leal 2(%rsi), %ecx
-; GENERIC-NEXT:    ## kill: def $cl killed $cl killed $ecx
-; GENERIC-NEXT:    shll %cl, %eax
-; GENERIC-NEXT:    retq
-;
-; ATOM-LABEL: trunc_select_miscompile:
-; ATOM:       ## %bb.0:
-; ATOM-NEXT:    ## kill: def $esi killed $esi def $rsi
-; ATOM-NEXT:    leal 2(%rsi), %ecx
-; ATOM-NEXT:    movl %edi, %eax
-; ATOM-NEXT:    ## kill: def $cl killed $cl killed $ecx
-; ATOM-NEXT:    shll %cl, %eax
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    nop
-; ATOM-NEXT:    retq
+; CHECK-LABEL: trunc_select_miscompile:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    ## kill: def $edi killed $edi def $rdi
+; CHECK-NEXT:    leal (,%rdi,8), %ecx
+; CHECK-NEXT:    leal (,%rdi,4), %eax
+; CHECK-NEXT:    testl %esi, %esi
+; CHECK-NEXT:    cmovnel %ecx, %eax
+; CHECK-NEXT:    retq
 ;
 ; ATHLON-LABEL: trunc_select_miscompile:
 ; ATHLON:       ## %bb.0:
 ; ATHLON-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; ATHLON-NEXT:    movb {{[0-9]+}}(%esp), %cl
-; ATHLON-NEXT:    orb $2, %cl
-; ATHLON-NEXT:    shll %cl, %eax
+; ATHLON-NEXT:    leal (,%eax,8), %ecx
+; ATHLON-NEXT:    shll $2, %eax
+; ATHLON-NEXT:    cmpb $0, {{[0-9]+}}(%esp)
+; ATHLON-NEXT:    cmovnel %ecx, %eax
 ; ATHLON-NEXT:    retl
 ;
 ; MCU-LABEL: trunc_select_miscompile:
 ; MCU:       # %bb.0:
-; MCU-NEXT:    movl %edx, %ecx
-; MCU-NEXT:    orb $2, %cl
-; MCU-NEXT:    # kill: def $cl killed $cl killed $ecx
-; MCU-NEXT:    shll %cl, %eax
+; MCU-NEXT:    testl %edx, %edx
+; MCU-NEXT:    jne .LBB20_1
+; MCU-NEXT:  # %bb.2:
+; MCU-NEXT:    shll $2, %eax
+; MCU-NEXT:    retl
+; MCU-NEXT:  .LBB20_1:
+; MCU-NEXT:    shll $3, %eax
 ; MCU-NEXT:    retl
   %tmp1 = select i1 %cc, i32 3, i32 2
   %tmp2 = shl i32 %a, %tmp1
diff --git a/llvm/test/CodeGen/X86/shift-parts.ll b/llvm/test/CodeGen/X86/shift-parts.ll
--- a/llvm/test/CodeGen/X86/shift-parts.ll
+++ b/llvm/test/CodeGen/X86/shift-parts.ll
@@ -10,17 +10,14 @@
 ; CHECK-LABEL: int87:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq g_144+{{.*}}(%rip), %rax
-; CHECK-NEXT:    movq g_144+{{.*}}(%rip), %rdx
-; CHECK-NEXT:    movzbl %sil, %ecx
-; CHECK-NEXT:    shll $6, %ecx
+; CHECK-NEXT:    movq g_144+{{.*}}(%rip), %rcx
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  .LBB0_1: # %for.cond
 ; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    movq %rdx, %rsi
-; CHECK-NEXT:    shrdq %cl, %rax, %rsi
-; CHECK-NEXT:    testb $64, %cl
-; CHECK-NEXT:    cmovneq %rax, %rsi
-; CHECK-NEXT:    orl $0, %esi
+; CHECK-NEXT:    testb $1, %sil
+; CHECK-NEXT:    movl %ecx, %edx
+; CHECK-NEXT:    cmovnel %eax, %edx
+; CHECK-NEXT:    testl %edx, %edx
 ; CHECK-NEXT:    je .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %if.then
 ; CHECK-NEXT:    movl $1, %eax