Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -545,6 +545,8 @@ unsigned PosOpcode, unsigned NegOpcode, const SDLoc &DL); SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); + SDValue MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL, + const APInt &DemandedBits); SDValue MatchLoadCombine(SDNode *N); SDValue MatchStoreCombine(StoreSDNode *N); SDValue ReduceLoadWidth(SDNode *N); @@ -6128,10 +6130,16 @@ return SDValue(); } +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { + return MatchRotate( + LHS, RHS, DL, APInt::getAllOnesValue(LHS.getValueType().getSizeInBits())); +} + // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { +SDValue DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL, + const APInt &DemandedBits) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) @@ -6203,21 +6211,41 @@ std::swap(LHSMask, RHSMask); } - unsigned EltSizeInBits = VT.getScalarSizeInBits(); SDValue LHSShiftArg = LHSShift.getOperand(0); SDValue LHSShiftAmt = LHSShift.getOperand(1); SDValue RHSShiftArg = RHSShift.getOperand(0); SDValue RHSShiftAmt = RHSShift.getOperand(1); + EVT RotVT = VT; + // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) - auto MatchRotateSum = [EltSizeInBits](ConstantSDNode *LHS, - ConstantSDNode *RHS) { - return (LHS->getAPIntValue() + RHS->getAPIntValue()) == EltSizeInBits; + auto MatchRotateSum = [this, &DemandedBits, &RotVT](ConstantSDNode *LHS, + ConstantSDNode *RHS) { + uint64_t RotAmount = + (LHS->getAPIntValue() + RHS->getAPIntValue()).getZExtValue(); + // For vectors, only allow exact match. + if (RotVT.isVector()) + return RotAmount == RotVT.getScalarSizeInBits(); + // For scalar, check that the type we use for rotation cover all demanded + // bits and is legal. + APInt RotMask = + APInt::getMaxValue(RotAmount).zextOrTrunc(DemandedBits.getBitWidth()); + if (!DemandedBits.isSubsetOf(RotMask)) + return false; + RotVT = EVT::getIntegerVT(*DAG.getContext(), RotAmount); + return TLI.isTypeLegal(RotVT); }; if (ISD::matchBinaryPredicate(LHSShiftAmt, RHSShiftAmt, MatchRotateSum)) { - SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); + HasROTL = hasOperation(ISD::ROTL, RotVT); + HasROTR = hasOperation(ISD::ROTR, RotVT); + if (!HasROTL && !HasROTR) + return SDValue(); + + SDValue Rotated = DAG.getZExtOrTrunc(LHSShiftArg, DL, RotVT); + SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, RotVT, + Rotated, HasROTL ? LHSShiftAmt : RHSShiftAmt); + Rot = DAG.getAnyExtOrTrunc(Rot, DL, VT); // If there is an AND of either shifted operand, apply it to the result. if (LHSMask.getNode() || RHSMask.getNode()) { @@ -10685,11 +10713,20 @@ // because targets may prefer a wider type during later combines and invert // this transform. switch (N0.getOpcode()) { + case ISD::OR: { + // TODO: This would idealy be part of the SimplifyDemandedBits mechanic, but + // there are no way to easily plug it in at the moment, so it is limited to + // TRUNC. + SDLoc DL(N); + if (SDValue Rot = MatchRotate(N0.getOperand(0), N0.getOperand(1), DL, + APInt::getMaxValue(VT.getSizeInBits()))) + return DAG.getNode(ISD::TRUNCATE, DL, VT, Rot); + LLVM_FALLTHROUGH; + } case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::AND: - case ISD::OR: case ISD::XOR: if (!LegalOperations && N0.hasOneUse() && (isConstantOrConstantVector(N0.getOperand(0), true) || Index: test/CodeGen/X86/rot16.ll =================================================================== --- test/CodeGen/X86/rot16.ll +++ test/CodeGen/X86/rot16.ll @@ -207,21 +207,14 @@ define i16 @rot16_trunc(i32 %x, i32 %y) nounwind { ; X32-LABEL: rot16_trunc: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-NEXT: movl %eax, %ecx -; X32-NEXT: shrl $11, %ecx -; X32-NEXT: shll $5, %eax -; X32-NEXT: orl %ecx, %eax -; X32-NEXT: # kill: def $ax killed $ax killed $eax +; X32-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X32-NEXT: rolw $5, %ax ; X32-NEXT: retl ; ; X64-LABEL: rot16_trunc: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: movl %edi, %ecx -; X64-NEXT: shrl $11, %ecx -; X64-NEXT: shll $5, %eax -; X64-NEXT: orl %ecx, %eax +; X64-NEXT: rolw $5, %ax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = lshr i32 %x, 11