Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7127,14 +7127,41 @@
             N2, APInt(N2.getScalarValueSizeInBits(), BitWidth - 1)))
       return IsFSHL ? N0 : N1;
 
-  // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
   if (ConstantSDNode *Cst = isConstOrConstSplat(N2)) {
+    EVT ShAmtTy = N2.getValueType();
+
+    // fold (fsh* N0, N1, c) -> (fsh* N0, N1, c % BitWidth)
     if (Cst->getAPIntValue().uge(BitWidth)) {
       uint64_t RotAmt = Cst->getAPIntValue().urem(BitWidth);
       return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0, N1,
-                         DAG.getConstant(RotAmt, SDLoc(N), N2.getValueType()));
+                         DAG.getConstant(RotAmt, SDLoc(N), ShAmtTy));
     }
-  }
+
+    unsigned ShAmt = Cst->getZExtValue();
+    if (ShAmt == 0)
+      return IsFSHL ? N0 : N1;
+
+    // fold fshl(undef, N1, C) -> lshr(N1, BW-C)
+    // fold fshr(undef, N1, C) -> lshr(N1, C)
+    // fold fshl(N0, undef, C) -> lshr(N0, C)
+    // fold fshr(N0, undef, C) -> lshr(N0, BW-C)
+    if (N0.isUndef())
+      return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1,
+                         DAG.getConstant(IsFSHL ? BitWidth - ShAmt : ShAmt,
+                                         SDLoc(N), ShAmtTy));
+    if (N1.isUndef())
+      return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0,
+                         DAG.getConstant(IsFSHL ? ShAmt : BitWidth - ShAmt,
+                                         SDLoc(N), ShAmtTy));
+  }
+
+  // fold fshr(undef, N1, N2) -> lshr(N1, N2)
+  // fold fshl(N0, undef, N2) -> shl(N0, N2)
+  // TODO: when is it worth doing SUB(BW, N2) as well?
+  if (N0.isUndef() && !IsFSHL)
+    return DAG.getNode(ISD::SRL, SDLoc(N), VT, N1, N2);
+  if (N1.isUndef() && IsFSHL)
+    return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, N2);
 
   // fold (fshl N0, N0, N2) -> (rotl N0, N2)
   // fold (fshr N0, N0, N2) -> (rotr N0, N2)
Index: test/CodeGen/X86/funnel-shift.ll
===================================================================
--- test/CodeGen/X86/funnel-shift.ll
+++ test/CodeGen/X86/funnel-shift.ll
@@ -382,12 +382,13 @@
 ; X32-SSE2-LABEL: fshl_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shldl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shrl $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -398,7 +399,7 @@
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl %cl, %eax, %eax
+; X32-SSE2-NEXT:    shll %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1:
@@ -406,7 +407,7 @@
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shll %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1)
   ret i32 %res
@@ -416,13 +417,13 @@
 ; X32-SSE2-LABEL: fshl_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shldl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshl_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %eax, %eax
+; X64-AVX2-NEXT:    shll $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
@@ -433,7 +434,7 @@
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl %cl, %eax, %eax
+; X32-SSE2-NEXT:    shrl %cl, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0:
@@ -441,7 +442,7 @@
 ; X64-AVX2-NEXT:    movl %esi, %ecx
 ; X64-AVX2-NEXT:    movl %edi, %eax
 ; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %eax, %eax
+; X64-AVX2-NEXT:    shrl %cl, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1)
   ret i32 %res
@@ -451,13 +452,13 @@
 ; X32-SSE2-LABEL: fshr_i32_undef0_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shrl $9, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef0_cst:
 ; X64-AVX2:       # %bb.0:
 ; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrdl $9, %eax, %eax
+; X64-AVX2-NEXT:    shrl $9, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
@@ -485,12 +486,13 @@
 ; X32-SSE2-LABEL: fshr_i32_undef1_cst:
 ; X32-SSE2:       # %bb.0:
 ; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-SSE2-NEXT:    shrdl $9, %eax, %eax
+; X32-SSE2-NEXT:    shll $23, %eax
 ; X32-SSE2-NEXT:    retl
 ;
 ; X64-AVX2-LABEL: fshr_i32_undef1_cst:
 ; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    shrdl $9, %edi, %eax
+; X64-AVX2-NEXT:    movl %edi, %eax
+; X64-AVX2-NEXT:    shll $23, %eax
 ; X64-AVX2-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res