diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -6630,8 +6630,11 @@
 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
 // to consider shift amounts with defined behavior.
+//
+// The IsRotate flag should be set when the LHS of both shifts is the same.
+// Otherwise if matching a general funnel shift, it should be clear.
 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize,
-                           SelectionDAG &DAG) {
+                           SelectionDAG &DAG, bool IsRotate) {
   // If EltSize is a power of 2 then:
   //
   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
@@ -6663,8 +6666,11 @@
   // always invokes undefined behavior for 32-bit X.
   //
   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
+  //
+  // NOTE: We can only do this when matching an AND and not a general
+  // funnel shift.
   unsigned MaskLoBits = 0;
-  if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
+  if (IsRotate && Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
       KnownBits Known = DAG.computeKnownBits(Neg.getOperand(0));
       unsigned Bits = Log2_64(EltSize);
@@ -6754,7 +6760,8 @@
   //          (srl x, (*ext y))) ->
   //   (rotr x, y) or (rotl x, (sub 32, y))
   EVT VT = Shifted.getValueType();
-  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) {
+  if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG,
+                     /*IsRotate*/ true)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
                        HasPos ? Pos : Neg);
@@ -6783,7 +6790,7 @@
   // fold (or (shl x0, (*ext (sub 32, y))),
   //          (srl x1, (*ext y))) ->
   //   (fshr x0, x1, y) or (fshl x0, x1, (sub 32, y))
-  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG)) {
+  if (matchRotateSub(InnerPos, InnerNeg, EltBits, DAG, /*IsRotate*/ N0 == N1)) {
     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, N0, N1,
                        HasPos ? Pos : Neg);
diff --git a/llvm/test/CodeGen/X86/shift-double.ll b/llvm/test/CodeGen/X86/shift-double.ll
--- a/llvm/test/CodeGen/X86/shift-double.ll
+++ b/llvm/test/CodeGen/X86/shift-double.ll
@@ -480,23 +480,31 @@
   ret i32 %sh
 }
 
-; PR34641 - Masked Shift Counts
+; These are not valid shld/shrd patterns. When the shift amount modulo
+; the bitwidth is zero, the result should be an OR of both operands not a
+; shift.
 
-define i32 @shld_safe_i32(i32, i32, i32) {
-; X86-LABEL: shld_safe_i32:
+define i32 @not_shld_i32(i32, i32, i32) {
+; X86-LABEL: not_shld_i32:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shldl %cl, %edx, %eax
+; X86-NEXT:    shll %cl, %edx
+; X86-NEXT:    negb %cl
+; X86-NEXT:    shrl %cl, %eax
+; X86-NEXT:    orl %edx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: shld_safe_i32:
+; X64-LABEL: not_shld_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    shll %cl, %edi
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shldl %cl, %esi, %eax
+; X64-NEXT:    shrl %cl, %eax
+; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %4 = and i32 %2, 31
   %5 = shl i32 %0, %4
@@ -507,21 +515,27 @@
   ret i32 %9
 }
 
-define i32 @shrd_safe_i32(i32, i32, i32) {
-; X86-LABEL: shrd_safe_i32:
+define i32 @not_shrd_i32(i32, i32, i32) {
+; X86-LABEL: not_shrd_i32:
 ; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    shrdl %cl, %edx, %eax
+; X86-NEXT:    shrl %cl, %edx
+; X86-NEXT:    negb %cl
+; X86-NEXT:    shll %cl, %eax
+; X86-NEXT:    orl %edx, %eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: shrd_safe_i32:
+; X64-LABEL: not_shrd_i32:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edx, %ecx
-; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    shrl %cl, %edi
+; X64-NEXT:    negb %cl
 ; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-NEXT:    shrdl %cl, %esi, %eax
+; X64-NEXT:    shll %cl, %eax
+; X64-NEXT:    orl %edi, %eax
 ; X64-NEXT:    retq
   %4 = and i32 %2, 31
   %5 = lshr i32 %0, %4