Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -440,6 +440,7 @@ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *ReassociateOrForRotate(SDValue Op0, SDValue Op1, const SDLoc &dl); SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, @@ -4864,6 +4865,11 @@ // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); + // If N0 or N1 are themselves ORs, there is still a potential for a rotate + // whose parts got reassociated with some other stuff. + if (N0.getOpcode() == ISD::OR || N1.getOpcode() == ISD::OR) + if (SDNode *Or = ReassociateOrForRotate(N0, N1, SDLoc(N))) + return SDValue(Or, 0); if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -4875,6 +4881,90 @@ return SDValue(); } +SDNode *DAGCombiner::ReassociateOrForRotate(SDValue Op0, SDValue Op1, + const SDLoc &dl) { + EVT VT = Op0.getValueType(); + if (!hasOperation(ISD::ROTL, VT) && !hasOperation(ISD::ROTR, VT)) + return nullptr; + + // Expand all single-use ORs into a list (OredOps). + SmallVector WorkQ = { Op0, Op1 }; + SmallVector OredOps; + for (unsigned i = 0; i != WorkQ.size(); ++i) { + SDValue V = WorkQ[i]; + if (V.getOpcode() == ISD::OR && V.hasOneUse()) { + WorkQ.push_back(V.getOperand(0)); + WorkQ.push_back(V.getOperand(1)); + } else + OredOps.push_back(V); + } + + // Sort the ORed values with respect to the opcodes. This is to isolate + // the SHL operations into one contiguous block and same for SRL. + auto OpcOrder = [](SDValue A, SDValue B) { + // SHL < SRL < ... + unsigned OA = A.getOpcode(), OB = B.getOpcode(); + if (OA == ISD::SHL) + return true; + if (OB == ISD::SHL) + return false; + if (OA == ISD::SRL) + return true; + if (OB == ISD::SRL) + return false; + return OA < OB; + }; + std::stable_sort(OredOps.begin(), OredOps.end(), OpcOrder); + + // Find the number of SHLs and SRLs. + auto TmpL = find_if(OredOps, + [](SDValue V) { return V.getOpcode() != ISD::SHL; }); + unsigned LeftCount = std::distance(OredOps.begin(), TmpL); + auto TmpR = find_if(make_range(OredOps.begin()+LeftCount, OredOps.end()), + [](SDValue V) { return V.getOpcode() != ISD::SRL; }); + unsigned TotalCount = std::distance(OredOps.begin(), TmpR); + + // If there is at least one left and right shift, do pairwise matching. + if (LeftCount == 0 || LeftCount == TotalCount) + return nullptr; + + bool CreatedRotate = false; + for (unsigned L = 0; L != LeftCount; ++L) { + for (unsigned R = LeftCount; R != TotalCount; ++R) { + if (!OredOps[R]) + continue; + // If a rotate was generated, nullify original pieces and append + // the rotate to the list. + if (SDNode *T = MatchRotate(OredOps[L], OredOps[R], dl)) { + OredOps.push_back(SDValue(T, 0)); + OredOps[L] = OredOps[R] = SDValue(); + CreatedRotate = true; + break; + } + } + } + + // All pairs of left-right shifts have been examined. Now, re-package + // the values back into an OR tree. + if (!CreatedRotate) + return nullptr; + + auto OredEnd = remove_if(OredOps, [](SDValue V) { return !bool(V); }); + unsigned Size = OredEnd - OredOps.begin(); + while (Size != 1) { + for (unsigned i = 0; i != Size/2; ++i) + OredOps[i] = DAG.getNode(ISD::OR, dl, VT, OredOps[2*i], OredOps[2*i+1]); + if (Size % 2 != 0) { + OredOps[Size/2+1] = OredOps[Size-1]; + Size = Size/2 + 1; + } else + Size /= 2; + } + + // The last remaining op is the root. + return OredOps[0].getNode(); +} + /// Match "(X shl/srl V1) & V2" where V2 may not be present. bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { if (Op.getOpcode() == ISD::AND) { Index: test/CodeGen/Hexagon/rotate.ll =================================================================== --- test/CodeGen/Hexagon/rotate.ll +++ test/CodeGen/Hexagon/rotate.ll @@ -125,6 +125,17 @@ ret i32 %v3 } +; CHECK-LABEL: f11 +; CHECK: r0 |= rol(r1,#7) +define dso_local i32 @f11(i32 %a0, i32 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i32 %a1, 7 + %v1 = lshr i32 %a1, 25 + %v2 = or i32 %v1, %a0 + %v3 = or i32 %v2, %v0 + ret i32 %v3 +} + ; CHECK-LABEL: f12 ; CHECK: r0 ^= rol(r1,#7) define dso_local i32 @f12(i32 %a0, i32 %a1) local_unnamed_addr #0 { @@ -169,6 +180,17 @@ ret i64 %v3 } +; CHECK-LABEL: f16 +; CHECK: r1:0 |= rol(r3:2,#7) +define dso_local i64 @f16(i64 %a0, i64 %a1) local_unnamed_addr #0 { +b0: + %v0 = shl i64 %a1, 7 + %v1 = lshr i64 %a1, 57 + %v2 = or i64 %v1, %a0 + %v3 = or i64 %v2, %v0 + ret i64 %v3 +} + ; CHECK-LABEL: f17 ; CHECK: r1:0 ^= rol(r3:2,#7) define dso_local i64 @f17(i64 %a0, i64 %a1) local_unnamed_addr #0 {