Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -69,6 +69,7 @@ #include #include #include +#include #include #include #include @@ -460,6 +461,7 @@ SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); + SDNode *ReassociateOrForRotate(SDValue Op0, SDValue Op1, const SDLoc &dl); SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, SDValue InnerPos, SDValue InnerNeg, unsigned PosOpcode, unsigned NegOpcode, @@ -5468,6 +5470,11 @@ // See if this is some rotate idiom. if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); + // If N0 or N1 are themselves ORs, there is still a potential for a rotate + // whose parts got reassociated with some other stuff. + if (N0.getOpcode() == ISD::OR || N1.getOpcode() == ISD::OR) + if (SDNode *Or = ReassociateOrForRotate(N0, N1, SDLoc(N))) + return SDValue(Or, 0); if (SDValue Load = MatchLoadCombine(N)) return Load; @@ -5488,6 +5495,94 @@ return Op; } +SDNode *DAGCombiner::ReassociateOrForRotate(SDValue Op0, SDValue Op1, + const SDLoc &dl) { + EVT VT = Op0.getValueType(); + if (!hasOperation(ISD::ROTL, VT) && !hasOperation(ISD::ROTR, VT)) + return nullptr; + + // Expand all single-use ORs into a list (OredOps). + SmallVector OredOps; + std::queue WorkQ; + WorkQ.push(Op0); + WorkQ.push(Op1); + + while (!WorkQ.empty()) { + SDValue V = WorkQ.front(); + WorkQ.pop(); + if (V.getOpcode() == ISD::OR && V.hasOneUse()) { + WorkQ.push(V.getOperand(0)); + WorkQ.push(V.getOperand(1)); + } else + OredOps.push_back(V); + } + + // Since only shifts of the same SDValue can end up paired up into a rotate, + // create separate lists of shifts for each shifted value. + DenseMap> OpMap; + for (unsigned I = 0, E = OredOps.size(); I != E; ++I) { + SDValue V = OredOps[I]; + unsigned Opc = V.getOpcode(); + if (Opc == ISD::SHL || Opc == ISD::SRL) + OpMap[V.getOperand(0)->getNodeId()].push_back(I); + } + + // Sort the shifts with respect to the opcodes. This is to group + // the SHL operations into one contiguous block and same for SRL. + auto OpcOrder = [&OredOps](unsigned I, unsigned J) { + return OredOps[I].getOpcode() < OredOps[J].getOpcode(); + }; + + bool CreatedRotate = false; + + for (auto P : OpMap) { + auto &Shifts = P.second; + assert(!Shifts.empty() && "OpMap should not have empty lists"); + llvm::sort(Shifts.begin(), Shifts.end(), OpcOrder); + // The list of shifts should only have SHL and SRL on it grouped into + // two contiguous segments. Find the beginning of the second segment. + auto Boundary = std::upper_bound(std::next(Shifts.begin()), Shifts.end(), + Shifts.front(), OpcOrder); + + for (unsigned I = 0, E = Boundary - Shifts.begin(); I != E; ++I) { + for (unsigned J = E, F = Shifts.size(); J != F; ++J) { + SDValue &OI = OredOps[Shifts[I]], &OJ = OredOps[Shifts[J]]; + if (!OJ) + continue; + if (SDNode *T = MatchRotate(OI, OJ, dl)) { + OredOps.push_back(SDValue(T, 0)); + OI = OJ = SDValue(); + CreatedRotate = true; + // When a rotate is created, stop the inner loop traversal, but + // continue with the outer loop so that more opportunities for + // rotates of the same value could be found. + break; + } + } + } + } + + // All pairs of left-right shifts have been examined. Now, re-package + // the values back into an OR tree. + if (!CreatedRotate) + return nullptr; + + auto OredEnd = remove_if(OredOps, [](SDValue V) { return !bool(V); }); + unsigned Size = OredEnd - OredOps.begin(); + while (Size != 1) { + for (unsigned i = 0; i != Size/2; ++i) + OredOps[i] = DAG.getNode(ISD::OR, dl, VT, OredOps[2*i], OredOps[2*i+1]); + if (Size % 2 != 0) { + OredOps[Size/2] = OredOps[Size-1]; + Size = Size/2 + 1; + } else + Size /= 2; + } + + // The last remaining op is the root. + return OredOps[0].getNode(); +} + /// Match "(X shl/srl V1) & V2" where V2 may not be present. static bool matchRotateHalf(SelectionDAG &DAG, SDValue Op, SDValue &Shift, SDValue &Mask) { Index: test/CodeGen/Hexagon/rotate-multi.ll =================================================================== --- test/CodeGen/Hexagon/rotate-multi.ll +++ test/CodeGen/Hexagon/rotate-multi.ll @@ -35,11 +35,8 @@ ; OR of two rotates of two different inputs: %a0(r0) and %a1(r1). ; CHECK-LABEL: f2: -; CHECK: r[[R20:[0-9]+]] = asl(r0,#11) -; CHECK: r[[R21:[0-9]+]] = lsr(r0,#21) -; CHECK: r[[R22:[0-9]+]] = lsr(r1,#13) -; CHECK: r[[R20]] |= asl(r1,#19) -; CHECK: r[[R20]] |= or(r[[R21]],r[[R22]]) +; CHECK: r0 = rol(r0,#11) +; CHECK: r0 |= rol(r1,#19) define i32 @f2(i32 %a0, i32 %a1) #0 { %v0 = shl i32 %a0, 11 %v1 = lshr i32 %a0, 21 @@ -54,16 +51,9 @@ ; ORs of multiple shifts of the same value with only one pair actually ; matching a rotate. ; CHECK-LABEL: f3: -; CHECK: r[[R30:[0-9]+]] = asl(r0,#3) -; CHECK: r[[R30]] |= asl(r0,#5) -; CHECK: r[[R30]] |= asl(r0,#7) -; CHECK: r[[R30]] |= asl(r0,#13) -; CHECK: r[[R30]] |= asl(r0,#19) -; CHECK: r[[R30]] |= lsr(r0,#2) -; CHECK: r[[R30]] |= lsr(r0,#15) -; CHECK: r[[R30]] |= lsr(r0,#23) -; CHECK: r[[R30]] |= lsr(r0,#25) -; CHECK: r[[R30]] |= lsr(r0,#30) +; CHECK-NOT: rol +; CHECK: r[[R30:[0-9]+]] |= rol(r0,#7) +; CHECK-NOT: rol define i32 @f3(i32 %a0) #0 { %v0 = shl i32 %a0, 3 %v1 = shl i32 %a0, 5 Index: test/CodeGen/Hexagon/rotate.ll =================================================================== --- test/CodeGen/Hexagon/rotate.ll +++ test/CodeGen/Hexagon/rotate.ll @@ -135,8 +135,7 @@ } ; CHECK-LABEL: f11 -; CHECK: r0 |= lsr(r1,#25) -; CHECK: r0 |= asl(r1,#7) +; CHECK: r0 |= rol(r1,#7) define i32 @f11(i32 %a0, i32 %a1) #0 { b0: %v0 = shl i32 %a1, 7 @@ -191,8 +190,7 @@ } ; CHECK-LABEL: f16 -; CHECK: r1:0 |= lsr(r3:2,#57) -; CHECK: r1:0 |= asl(r3:2,#7) +; CHECK: r1:0 |= rol(r3:2,#7) define i64 @f16(i64 %a0, i64 %a1) #0 { b0: %v0 = shl i64 %a1, 7 Index: test/CodeGen/X86/rotate-multi.ll =================================================================== --- test/CodeGen/X86/rotate-multi.ll +++ test/CodeGen/X86/rotate-multi.ll @@ -7,8 +7,8 @@ ; CHECK: # %bb.0: # %b0 ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: roll $7, %ecx -; CHECK-NEXT: roll $9, %eax +; CHECK-NEXT: roll $9, %ecx +; CHECK-NEXT: roll $7, %eax ; CHECK-NEXT: orl %ecx, %eax ; CHECK-NEXT: retq b0: @@ -49,16 +49,10 @@ define i32 @f2(i32 %a0, i32 %a1) #0 { ; CHECK-LABEL: f2: ; CHECK: # %bb.0: -; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: shll $11, %ecx -; CHECK-NEXT: shrl $21, %edi -; CHECK-NEXT: movl %esi, %edx -; CHECK-NEXT: shll $19, %edx -; CHECK-NEXT: shrl $13, %eax -; CHECK-NEXT: orl %edi, %eax -; CHECK-NEXT: orl %edx, %eax -; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: roll $19, %esi +; CHECK-NEXT: roll $11, %eax +; CHECK-NEXT: orl %esi, %eax ; CHECK-NEXT: retq %v0 = shl i32 %a0, 11 %v1 = lshr i32 %a0, 21 @@ -75,12 +69,10 @@ define i32 @f3(i32 %a0) #0 { ; CHECK-LABEL: f3: ; CHECK: # %bb.0: # %b0 -; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal (,%rdi,8), %eax -; CHECK-NEXT: movl %edi, %ecx -; CHECK-NEXT: shll $5, %ecx +; CHECK-NEXT: movl %edi, %eax +; CHECK-NEXT: leal (,%rax,8), %ecx ; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: shll $7, %edx +; CHECK-NEXT: shll $5, %edx ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shll $13, %ecx @@ -90,19 +82,19 @@ ; CHECK-NEXT: orl %ecx, %edx ; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shrl $2, %ecx -; CHECK-NEXT: orl %edx, %ecx -; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: shrl $15, %edx -; CHECK-NEXT: orl %ecx, %edx +; CHECK-NEXT: movl %edi, %esi +; CHECK-NEXT: shrl $15, %esi +; CHECK-NEXT: orl %ecx, %esi ; CHECK-NEXT: movl %edi, %ecx ; CHECK-NEXT: shrl $23, %ecx +; CHECK-NEXT: orl %esi, %ecx ; CHECK-NEXT: orl %edx, %ecx ; CHECK-NEXT: movl %edi, %edx -; CHECK-NEXT: shrl $25, %edx -; CHECK-NEXT: orl %ecx, %edx -; CHECK-NEXT: shrl $30, %edi -; CHECK-NEXT: orl %edx, %edi -; CHECK-NEXT: orl %edi, %eax +; CHECK-NEXT: shrl $30, %edx +; CHECK-NEXT: roll $7, %eax +; CHECK-NEXT: orl %ecx, %eax +; CHECK-NEXT: orl %edx, %eax +; CHECK-NEXT: # kill: def $eax killed $eax killed $rax ; CHECK-NEXT: retq b0: %v0 = shl i32 %a0, 3