Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -69,6 +69,7 @@
 #include <cstdint>
 #include <functional>
 #include <iterator>
+#include <queue>
 #include <string>
 #include <tuple>
 #include <utility>
@@ -459,6 +460,7 @@
     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
                                bool DemandHighBits = true);
     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
+    SDNode *ReassociateOrForRotate(SDValue Op0, SDValue Op1, const SDLoc &dl);
     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
                               SDValue InnerPos, SDValue InnerNeg,
                               unsigned PosOpcode, unsigned NegOpcode,
@@ -5384,6 +5386,11 @@
   // See if this is some rotate idiom.
   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
     return SDValue(Rot, 0);
+  // If N0 or N1 are themselves ORs, there is still a potential for a rotate
+  // whose parts got reassociated with some other stuff.
+  if (N0.getOpcode() == ISD::OR || N1.getOpcode() == ISD::OR)
+    if (SDNode *Or = ReassociateOrForRotate(N0, N1, SDLoc(N)))
+      return SDValue(Or, 0);
 
   if (SDValue Load = MatchLoadCombine(N))
     return Load;
@@ -5404,6 +5411,94 @@
   return Op;
 }
 
+SDNode *DAGCombiner::ReassociateOrForRotate(SDValue Op0, SDValue Op1,
+                                            const SDLoc &dl) {
+  EVT VT = Op0.getValueType();
+  if (!hasOperation(ISD::ROTL, VT) && !hasOperation(ISD::ROTR, VT))
+    return nullptr;
+
+  // Expand all single-use ORs into a list (OredOps).
+  SmallVector<SDValue,8> OredOps;
+  std::queue<SDValue> WorkQ;
+  WorkQ.push(Op0);
+  WorkQ.push(Op1);
+
+  while (!WorkQ.empty()) {
+    SDValue V = WorkQ.front();
+    WorkQ.pop();
+    if (V.getOpcode() == ISD::OR && V.hasOneUse()) {
+      WorkQ.push(V.getOperand(0));
+      WorkQ.push(V.getOperand(1));
+    } else
+      OredOps.push_back(V);
+  }
+
+  // Since only shifts of the same SDValue can end up paired up into a rotate,
+  // create separate lists of shifts for each shifted value.
+  DenseMap<int,SmallVector<unsigned,8>> OpMap;
+  for (unsigned I = 0, E = OredOps.size(); I != E; ++I) {
+    SDValue V = OredOps[I];
+    unsigned Opc = V.getOpcode();
+    if (Opc == ISD::SHL || Opc == ISD::SRL)
+      OpMap[V.getOperand(0)->getNodeId()].push_back(I);
+  }
+
+  // Sort the shifts with respect to the opcodes. This is to group
+  // the SHL operations into one contiguous block and same for SRL.
+  auto OpcOrder = [&OredOps](unsigned I, unsigned J) {
+    return OredOps[I].getOpcode() < OredOps[J].getOpcode();
+  };
+
+  bool CreatedRotate = false;
+
+  for (auto P : OpMap) {
+    auto &Shifts = P.second;
+    assert(!Shifts.empty() && "OpMap should not have empty lists");
+    llvm::sort(Shifts.begin(), Shifts.end(), OpcOrder);
+    // The list of shifts should only have SHL and SRL on it grouped into
+    // two contiguous segments. Find the beginning of the second segment.
+    auto Boundary = std::upper_bound(std::next(Shifts.begin()), Shifts.end(),
+                                     Shifts.front(), OpcOrder);
+
+    for (unsigned I = 0, E = Boundary - Shifts.begin(); I != E; ++I) {
+      for (unsigned J = E, F = Shifts.size(); J != F; ++J) {
+        SDValue &OI = OredOps[Shifts[I]], &OJ = OredOps[Shifts[J]];
+        if (!OJ)
+          continue;
+        if (SDNode *T = MatchRotate(OI, OJ, dl)) {
+          OredOps.push_back(SDValue(T, 0));
+          OI = OJ = SDValue();
+          CreatedRotate = true;
+          // When a rotate is created, stop the inner loop traversal, but
+          // continue with the outer loop so that more opportunities for
+          // rotates of the same value could be found.
+          break;
+        }
+      }
+    }
+  }
+
+  // All pairs of left-right shifts have been examined. Now, re-package
+  // the values back into an OR tree.
+  if (!CreatedRotate)
+    return nullptr;
+
+  auto OredEnd = remove_if(OredOps, [](SDValue V) { return !bool(V); });
+  unsigned Size = OredEnd - OredOps.begin();
+  while (Size != 1) {
+    for (unsigned i = 0; i != Size/2; ++i)
+      OredOps[i] = DAG.getNode(ISD::OR, dl, VT, OredOps[2*i], OredOps[2*i+1]);
+    if (Size % 2 != 0) {
+      OredOps[Size/2] = OredOps[Size-1];
+      Size = Size/2 + 1;
+    } else
+      Size /= 2;
+  }
+
+  // The last remaining op is the root.
+  return OredOps[0].getNode();
+}
+
 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
 static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift,
                             SDValue &Mask) {
Index: test/CodeGen/Hexagon/rotate-multi.ll
===================================================================
--- /dev/null
+++ test/CodeGen/Hexagon/rotate-multi.ll
@@ -0,0 +1,80 @@
+; RUN: llc -march=hexagon < %s | FileCheck %s
+
+; OR of two rotates of %a0(r0).
+; CHECK-LABEL: f0:
+; CHECK: r[[R00:[0-9]+]] = rol(r0,#7)
+; CHECK: r[[R00]] |= rol(r0,#9)
+define i32 @f0(i32 %a0) #0 {
+b0:
+  %v0 = shl i32 %a0, 7
+  %v1 = lshr i32 %a0, 25
+  %v2 = or i32 %v0, %v1
+  %v3 = shl i32 %a0, 9
+  %v4 = lshr i32 %a0, 23
+  %v5 = or i32 %v3, %v4
+  %v6 = or i32 %v2, %v5
+  ret i32 %v6
+}
+
+; OR of two rotates of %a0(r0) with an extra input %a1(r1).
+; CHECK-LABEL: f1:
+; CHECK: r1 |= asl(r0,#7)
+; CHECK: r1 |= rol(r0,#9)
+define i32 @f1(i32 %a0, i32 %a1) #0 {
+b0:
+  %v0 = shl i32 %a0, 7
+  %v1 = lshr i32 %a0, 25
+  %v2 = or i32 %v0, %a1
+  %v3 = shl i32 %a0, 9
+  %v4 = lshr i32 %a0, 23
+  %v5 = or i32 %v3, %v4
+  %v6 = or i32 %v2, %v5
+  %v7 = or i32 %v6, %v1
+  ret i32 %v6
+}
+
+; OR of two rotates of two different inputs: %a0(r0) and %a1(r1).
+; CHECK-LABEL: f2:
+; CHECK: r0 = rol(r0,#11)
+; CHECK: r0 |= rol(r1,#19)
+define i32 @f2(i32 %a0, i32 %a1) #0 {
+  %v0 = shl i32 %a0, 11
+  %v1 = lshr i32 %a0, 21
+  %v2 = shl i32 %a1, 19
+  %v3 = lshr i32 %a1, 13
+  %v4 = or i32 %v0, %v2
+  %v5 = or i32 %v1, %v3
+  %v6 = or i32 %v4, %v5
+  ret i32 %v6
+}
+
+; ORs of multiple shifts of the same value with only one pair actually
+; matching a rotate.
+; CHECK-LABEL: f3:
+; CHECK-NOT: rol
+; CHECK: r[[R30:[0-9]+]] |= rol(r0,#7)
+; CHECK-NOT: rol
+define i32 @f3(i32 %a0) #0 {
+  %v0 = shl i32 %a0, 3
+  %v1 = shl i32 %a0, 5
+  %v2 = shl i32 %a0, 7      ; rotate
+  %v3 = shl i32 %a0, 13
+  %v4 = shl i32 %a0, 19
+  %v5 = lshr i32 %a0, 2
+  %v6 = lshr i32 %a0, 15
+  %v7 = lshr i32 %a0, 23
+  %v8 = lshr i32 %a0, 25    ; rotate
+  %v9 = lshr i32 %a0, 30
+  %v10 = or i32  %v0, %v1
+  %v11 = or i32 %v10, %v2
+  %v12 = or i32 %v11, %v3
+  %v13 = or i32 %v12, %v4
+  %v14 = or i32 %v13, %v5
+  %v15 = or i32 %v14, %v6
+  %v16 = or i32 %v15, %v7
+  %v17 = or i32 %v16, %v8
+  %v18 = or i32 %v17, %v9
+  ret i32 %v18
+}
+
+attributes #0 = { readnone nounwind "target-cpu"="hexagonv60" "target-features"="-packets" }
Index: test/CodeGen/Hexagon/rotate.ll
===================================================================
--- test/CodeGen/Hexagon/rotate.ll
+++ test/CodeGen/Hexagon/rotate.ll
@@ -134,6 +134,17 @@
   ret i32 %v3
 }
 
+; CHECK-LABEL: f11
+; CHECK: r0 |= rol(r1,#7)
+define i32 @f11(i32 %a0, i32 %a1) #0 {
+b0:
+  %v0 = shl i32 %a1, 7
+  %v1 = lshr i32 %a1, 25
+  %v2 = or i32 %v1, %a0
+  %v3 = or i32 %v2, %v0
+  ret i32 %v3
+}
+
 ; CHECK-LABEL: f12
 ; CHECK: r0 ^= rol(r1,#7)
 define i32 @f12(i32 %a0, i32 %a1) #0 {
@@ -178,6 +189,17 @@
   ret i64 %v3
 }
 
+; CHECK-LABEL: f16
+; CHECK: r1:0 |= rol(r3:2,#7)
+define i64 @f16(i64 %a0, i64 %a1) #0 {
+b0:
+  %v0 = shl i64 %a1, 7
+  %v1 = lshr i64 %a1, 57
+  %v2 = or i64 %v1, %a0
+  %v3 = or i64 %v2, %v0
+  ret i64 %v3
+}
+
 ; CHECK-LABEL: f17
 ; CHECK: r1:0 ^= rol(r3:2,#7)
 define i64 @f17(i64 %a0, i64 %a1) #0 {
Index: test/CodeGen/X86/rotate-multi.ll
===================================================================
--- /dev/null
+++ test/CodeGen/X86/rotate-multi.ll
@@ -0,0 +1,122 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64-- < %s | FileCheck %s
+
+; OR of two rotates of %a0(edi).
+define i32 @f0(i32 %a0) #0 {
+; CHECK-LABEL: f0:
+; CHECK:       # %bb.0: # %b0
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    roll $9, %ecx
+; CHECK-NEXT:    roll $7, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    retq
+b0:
+  %v0 = shl i32 %a0, 7
+  %v1 = lshr i32 %a0, 25
+  %v2 = or i32 %v0, %v1
+  %v3 = shl i32 %a0, 9
+  %v4 = lshr i32 %a0, 23
+  %v5 = or i32 %v3, %v4
+  %v6 = or i32 %v2, %v5
+  ret i32 %v6
+}
+
+; OR of two rotates of %a0(edi) with an extra input %a1(esi).
+define i32 @f1(i32 %a0, i32 %a1) #0 {
+; CHECK-LABEL: f1:
+; CHECK:       # %bb.0: # %b0
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    shll $7, %ecx
+; CHECK-NEXT:    roll $9, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    retq
+b0:
+  %v0 = shl i32 %a0, 7
+  %v1 = lshr i32 %a0, 25
+  %v2 = or i32 %v0, %a1
+  %v3 = shl i32 %a0, 9
+  %v4 = lshr i32 %a0, 23
+  %v5 = or i32 %v3, %v4
+  %v6 = or i32 %v2, %v5
+  %v7 = or i32 %v6, %v1
+  ret i32 %v6
+}
+
+; OR of two rotates of two different inputs: %a0(edi) and %a1(esi).
+define i32 @f2(i32 %a0, i32 %a1) #0 {
+; CHECK-LABEL: f2:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    roll $19, %esi
+; CHECK-NEXT:    roll $11, %eax
+; CHECK-NEXT:    orl %esi, %eax
+; CHECK-NEXT:    retq
+  %v0 = shl i32 %a0, 11
+  %v1 = lshr i32 %a0, 21
+  %v2 = shl i32 %a1, 19
+  %v3 = lshr i32 %a1, 13
+  %v4 = or i32 %v0, %v2
+  %v5 = or i32 %v1, %v3
+  %v6 = or i32 %v4, %v5
+  ret i32 %v6
+}
+
+; ORs of multiple shifts of the same value with only one pair actually
+; matching a rotate.
+define i32 @f3(i32 %a0) #0 {
+; CHECK-LABEL: f3:
+; CHECK:       # %bb.0: # %b0
+; CHECK-NEXT:    movl %edi, %eax
+; CHECK-NEXT:    leal (,%rax,8), %ecx
+; CHECK-NEXT:    movl %edi, %edx
+; CHECK-NEXT:    shll $5, %edx
+; CHECK-NEXT:    orl %ecx, %edx
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    shll $13, %ecx
+; CHECK-NEXT:    orl %edx, %ecx
+; CHECK-NEXT:    movl %edi, %edx
+; CHECK-NEXT:    shll $19, %edx
+; CHECK-NEXT:    orl %ecx, %edx
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    shrl $2, %ecx
+; CHECK-NEXT:    movl %edi, %esi
+; CHECK-NEXT:    shrl $15, %esi
+; CHECK-NEXT:    orl %ecx, %esi
+; CHECK-NEXT:    movl %edi, %ecx
+; CHECK-NEXT:    shrl $23, %ecx
+; CHECK-NEXT:    orl %esi, %ecx
+; CHECK-NEXT:    orl %edx, %ecx
+; CHECK-NEXT:    movl %edi, %edx
+; CHECK-NEXT:    shrl $30, %edx
+; CHECK-NEXT:    roll $7, %eax
+; CHECK-NEXT:    orl %ecx, %eax
+; CHECK-NEXT:    orl %edx, %eax
+; CHECK-NEXT:    # kill: def $eax killed $eax killed $rax
+; CHECK-NEXT:    retq
+b0:
+  %v0 = shl i32 %a0, 3
+  %v1 = shl i32 %a0, 5
+  %v2 = shl i32 %a0, 7      ; rotate
+  %v3 = shl i32 %a0, 13
+  %v4 = shl i32 %a0, 19
+  %v5 = lshr i32 %a0, 2
+  %v6 = lshr i32 %a0, 15
+  %v7 = lshr i32 %a0, 23
+  %v8 = lshr i32 %a0, 25    ; rotate
+  %v9 = lshr i32 %a0, 30
+  %v10 = or i32  %v0, %v1
+  %v11 = or i32 %v10, %v2
+  %v12 = or i32 %v11, %v3
+  %v13 = or i32 %v12, %v4
+  %v14 = or i32 %v13, %v5
+  %v15 = or i32 %v14, %v6
+  %v16 = or i32 %v15, %v7
+  %v17 = or i32 %v16, %v8
+  %v18 = or i32 %v17, %v9
+  ret i32 %v18
+}
+
+attributes #0 = { readnone nounwind }