Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -4823,7 +4823,8 @@ // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate // in direction shift1 by Neg. The range [0, EltSize) means that we only need // to consider shift amounts with defined behavior. -static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { +static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize, + SelectionDAG &DAG) { // If EltSize is a power of 2 then: // // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) @@ -4858,9 +4859,13 @@ unsigned MaskLoBits = 0; if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { - if (NegC->getAPIntValue() == EltSize - 1) { + KnownBits Known; + DAG.computeKnownBits(Neg.getOperand(0), Known); + unsigned Bits = Log2_64(EltSize); + if (NegC->getAPIntValue().getActiveBits() <= Bits && + ((NegC->getAPIntValue() | Known.Zero).countTrailingOnes() >= Bits)) { Neg = Neg.getOperand(0); - MaskLoBits = Log2_64(EltSize); + MaskLoBits = Bits; } } } @@ -4875,10 +4880,16 @@ // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with // Pos'. The truncation is redundant for the purpose of the equality. - if (MaskLoBits && Pos.getOpcode() == ISD::AND) - if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) - if (PosC->getAPIntValue() == EltSize - 1) + if (MaskLoBits && Pos.getOpcode() == ISD::AND) { + if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) { + KnownBits Known; + DAG.computeKnownBits(Pos.getOperand(0), Known); + if (PosC->getAPIntValue().getActiveBits() <= MaskLoBits && + ((PosC->getAPIntValue() | Known.Zero).countTrailingOnes() >= + MaskLoBits)) Pos = Pos.getOperand(0); + } + } // The condition we need is now: // @@ -4934,7 +4945,7 @@ // (srl x, (*ext y))) -> // (rotr x, y) or (rotl x, (sub 32, y)) EVT VT = Shifted.getValueType(); - if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { + if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits(), DAG)) { bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, HasPos ? Pos : Neg).getNode(); Index: test/CodeGen/X86/rotate4.ll =================================================================== --- test/CodeGen/X86/rotate4.ll +++ test/CodeGen/X86/rotate4.ll @@ -284,15 +284,9 @@ define i32 @rotate_demanded_bits(i32, i32) { ; CHECK-LABEL: rotate_demanded_bits: ; CHECK: # %bb.0: +; CHECK-NEXT: andb $30, %sil ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: andl $30, %ecx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: negl %ecx -; CHECK-NEXT: andb $30, %cl -; CHECK-NEXT: # kill: def $cl killed $cl killed $ecx -; CHECK-NEXT: shrl %cl, %edi -; CHECK-NEXT: orl %eax, %edi +; CHECK-NEXT: roll %cl, %edi ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %3 = and i32 %1, 30 @@ -324,16 +318,10 @@ define i32 @rotate_demanded_bits_3(i32, i32) { ; CHECK-LABEL: rotate_demanded_bits_3: ; CHECK: # %bb.0: -; CHECK-NEXT: addl %esi, %esi -; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: andb $30, %cl -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: shll %cl, %eax -; CHECK-NEXT: negl %esi +; CHECK-NEXT: addb %sil, %sil ; CHECK-NEXT: andb $30, %sil ; CHECK-NEXT: movl %esi, %ecx -; CHECK-NEXT: shrl %cl, %edi -; CHECK-NEXT: orl %eax, %edi +; CHECK-NEXT: roll %cl, %edi ; CHECK-NEXT: movl %edi, %eax ; CHECK-NEXT: retq %3 = shl i32 %1, 1