Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -545,6 +545,8 @@
                               unsigned PosOpcode, unsigned NegOpcode,
                               const SDLoc &DL);
     SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
+    SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
+                        const APInt &DemandedBits);
     SDValue MatchLoadCombine(SDNode *N);
     SDValue MatchStoreCombine(StoreSDNode *N);
     SDValue ReduceLoadWidth(SDNode *N);
@@ -6128,10 +6130,16 @@
   return SDValue();
 }
 
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+  return MatchRotate(
+      LHS, RHS, DL, APInt::getAllOnesValue(LHS.getValueType().getSizeInBits()));
+}
+
 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
 // idioms for rotate, and if the target supports rotation instructions, generate
 // a rot[lr].
-SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
+SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL,
+                                 const APInt &DemandedBits) {
   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   EVT VT = LHS.getValueType();
   if (!TLI.isTypeLegal(VT))
@@ -6203,21 +6211,41 @@
     std::swap(LHSMask, RHSMask);
   }
 
-  unsigned EltSizeInBits = VT.getScalarSizeInBits();
   SDValue LHSShiftArg = LHSShift.getOperand(0);
   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   SDValue RHSShiftArg = RHSShift.getOperand(0);
   SDValue RHSShiftAmt = RHSShift.getOperand(1);
 
+  EVT RotVT = VT;
+
   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
-  auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS,
-                                        ConstantSDNode *RHS) {
-    return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits;
+  auto MatchRotateSum = [this, &DemandedBits, &RotVT](ConstantSDNode *LHS,
+                                                      ConstantSDNode *RHS) {
+    uint64_t RotAmount =
+        (LHS->getAPIntValue() + RHS->getAPIntValue()).getZExtValue();
+    // For vectors, only allow exact match.
+    if (RotVT.isVector())
+      return RotAmount == RotVT.getScalarSizeInBits();
+    // For scalar, check that the type we use for rotation cover all demanded
+    // bits and is legal.
+    APInt RotMask =
+        APInt::getMaxValue(RotAmount).zextOrTrunc(DemandedBits.getBitWidth());
+    if (!DemandedBits.isSubsetOf(RotMask))
+      return false;
+    RotVT = EVT::getIntegerVT(*DAG.getContext(), RotAmount);
+    return TLI.isTypeLegal(RotVT);
   };
   if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) {
-    SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
-                              LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+    HasROTL = hasOperation(ISD::ROTL, RotVT);
+    HasROTR = hasOperation(ISD::ROTR, RotVT);
+    if (!HasROTL && !HasROTR)
+      return SDValue();
+
+    SDValue Rotated = DAG.getZExtOrTrunc(LHSShiftArg, DL, RotVT);
+    SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, RotVT,
+                              Rotated, HasROTL ? LHSShiftAmt : RHSShiftAmt);
+    Rot = DAG.getAnyExtOrTrunc(Rot, DL, VT);
 
     // If there is an AND of either shifted operand, apply it to the result.
     if (LHSMask.getNode() || RHSMask.getNode()) {
@@ -10685,11 +10713,20 @@
   // because targets may prefer a wider type during later combines and invert
   // this transform.
   switch (N0.getOpcode()) {
+  case ISD::OR: {
+    // TODO: This would idealy be part of the SimplifyDemandedBits mechanic, but
+    // there are no way to easily plug it in at the moment, so it is limited to
+    // TRUNC.
+    SDLoc DL(N);
+    if (SDValue Rot = MatchRotate(N0.getOperand(0), N0.getOperand(1), DL,
+                                  APInt::getMaxValue(VT.getSizeInBits())))
+      return DAG.getNode(ISD::TRUNCATE, DL, VT, Rot);
+    LLVM_FALLTHROUGH;
+  }
   case ISD::ADD:
   case ISD::SUB:
   case ISD::MUL:
   case ISD::AND:
-  case ISD::OR:
   case ISD::XOR:
     if (!LegalOperations && N0.hasOneUse() &&
         (isConstantOrConstantVector(N0.getOperand(0), true) ||
Index: test/CodeGen/X86/rot16.ll
===================================================================
--- test/CodeGen/X86/rot16.ll
+++ test/CodeGen/X86/rot16.ll
@@ -207,21 +207,14 @@
 define i16 @rot16_trunc(i32 %x, i32 %y) nounwind {
 ; X32-LABEL: rot16_trunc:
 ; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl %eax, %ecx
-; X32-NEXT:    shrl $11, %ecx
-; X32-NEXT:    shll $5, %eax
-; X32-NEXT:    orl %ecx, %eax
-; X32-NEXT:    # kill: def $ax killed $ax killed $eax
+; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    rolw $5, %ax
 ; X32-NEXT:    retl
 ;
 ; X64-LABEL: rot16_trunc:
 ; X64:       # %bb.0:
 ; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    movl %edi, %ecx
-; X64-NEXT:    shrl $11, %ecx
-; X64-NEXT:    shll $5, %eax
-; X64-NEXT:    orl %ecx, %eax
+; X64-NEXT:    rolw $5, %ax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
 	%t0 = lshr i32 %x, 11