Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -456,6 +456,7 @@ bool matchBEXTRFromAnd(SDNode *Node); bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; + bool tryShiftAmountMod(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -1105,10 +1106,10 @@ // the Pos node's ID. Note that this does *not* preserve the uniqueness of node // IDs! The selection DAG must no longer depend on their uniqueness when this // is used. -static void insertDAGNode(SelectionDAG &DAG, SDValue Pos, SDValue N) { +static void insertDAGNode(SelectionDAG &DAG, SDNode *Pos, SDValue N) { if (N->getNodeId() == -1 || (SelectionDAGISel::getUninvalidatedNodeId(N.getNode()) > - SelectionDAGISel::getUninvalidatedNodeId(Pos.getNode()))) { + SelectionDAGISel::getUninvalidatedNodeId(Pos))) { DAG.RepositionNode(Pos->getIterator(), N.getNode()); // Mark Node as invalid for pruning as after this it may be a successor to a // selected node but otherwise be in the same position of Pos. @@ -1151,12 +1152,12 @@ // these nodes. We continually insert before 'N' in sequence as this is // essentially a pre-flattened and pre-sorted sequence of nodes. There is no // hierarchy left to express. - insertDAGNode(DAG, N, Eight); - insertDAGNode(DAG, N, Srl); - insertDAGNode(DAG, N, NewMask); - insertDAGNode(DAG, N, And); - insertDAGNode(DAG, N, ShlCount); - insertDAGNode(DAG, N, Shl); + insertDAGNode(DAG, N.getNode(), Eight); + insertDAGNode(DAG, N.getNode(), Srl); + insertDAGNode(DAG, N.getNode(), NewMask); + insertDAGNode(DAG, N.getNode(), And); + insertDAGNode(DAG, N.getNode(), ShlCount); + insertDAGNode(DAG, N.getNode(), Shl); DAG.ReplaceAllUsesWith(N, Shl); AM.IndexReg = And; AM.Scale = (1 << ScaleLog); @@ -1196,9 +1197,9 @@ // these nodes. We continually insert before 'N' in sequence as this is // essentially a pre-flattened and pre-sorted sequence of nodes. There is no // hierarchy left to express. - insertDAGNode(DAG, N, NewMask); - insertDAGNode(DAG, N, NewAnd); - insertDAGNode(DAG, N, NewShift); + insertDAGNode(DAG, N.getNode(), NewMask); + insertDAGNode(DAG, N.getNode(), NewAnd); + insertDAGNode(DAG, N.getNode(), NewShift); DAG.ReplaceAllUsesWith(N, NewShift); AM.Scale = 1 << ShiftAmt; @@ -1292,7 +1293,7 @@ assert(X.getValueType() != VT); // We looked through an ANY_EXTEND node, insert a ZERO_EXTEND. SDValue NewX = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(X), VT, X); - insertDAGNode(DAG, N, NewX); + insertDAGNode(DAG, N.getNode(), NewX); X = NewX; } SDLoc DL(N); @@ -1306,10 +1307,10 @@ // these nodes. We continually insert before 'N' in sequence as this is // essentially a pre-flattened and pre-sorted sequence of nodes. There is no // hierarchy left to express. - insertDAGNode(DAG, N, NewSRLAmt); - insertDAGNode(DAG, N, NewSRL); - insertDAGNode(DAG, N, NewSHLAmt); - insertDAGNode(DAG, N, NewSHL); + insertDAGNode(DAG, N.getNode(), NewSRLAmt); + insertDAGNode(DAG, N.getNode(), NewSRL); + insertDAGNode(DAG, N.getNode(), NewSHLAmt); + insertDAGNode(DAG, N.getNode(), NewSHL); DAG.ReplaceAllUsesWith(N, NewSHL); AM.Scale = 1 << AMShiftAmt; @@ -1544,8 +1545,8 @@ AM.Scale = 1; // Insert the new nodes into the topological ordering. - insertDAGNode(*CurDAG, Handle.getValue(), Zero); - insertDAGNode(*CurDAG, Handle.getValue(), Neg); + insertDAGNode(*CurDAG, Handle.getValue().getNode(), Zero); + insertDAGNode(*CurDAG, Handle.getValue().getNode(), Neg); return false; } @@ -2677,6 +2678,96 @@ return CNode; } +bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { + EVT VT = N->getValueType(0); + + // Only handle scalar shifts. + if (VT.isVector()) + return false; + + // Narrower shifts only mask to 5 bits in hardware. + unsigned Size = VT == MVT::i64 ? 64 : 32; + + SDValue ShiftAmt = N->getOperand(1); + SDLoc DL(N); + + // Skip over a truncate of the shift amount. + if (ShiftAmt->getOpcode() == ISD::TRUNCATE) + ShiftAmt = ShiftAmt->getOperand(0); + + // Special case to avoid messing up a BZHI pattern. + // Look for (srl (shl X, (size - y)), (size - y) + if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) && + N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL && + // Shift amounts the same? + N->getOperand(1) == N->getOperand(0).getOperand(1) && + // Shift amounts size - y? + ShiftAmt.getOpcode() == ISD::SUB && + isa(ShiftAmt.getOperand(0)) && + cast(ShiftAmt.getOperand(0))->getZExtValue() == Size) + return false; + + SDValue NewShiftAmt; + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + SDValue Add0 = ShiftAmt->getOperand(0); + SDValue Add1 = ShiftAmt->getOperand(1); + // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB. + if (isa(Add1) && + cast(Add1)->getZExtValue() % Size == 0) { + NewShiftAmt = Add0; + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + } else if (ShiftAmt->getOpcode() == ISD::SUB && + isa(Add0) && + cast(Add0)->getZExtValue() != 0 && + cast(Add0)->getZExtValue() % Size == 0) { + // Insert a negate op. + // TODO: This isn't guaranteed to replace the sub if there is a logic cone + // that uses it that's not a shift. + EVT SubVT = ShiftAmt.getValueType(); + SDValue Zero = CurDAG->getConstant(0, DL, SubVT); + SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1); + NewShiftAmt = Neg; + + // Insert these operands into a valid topological order so they can + // get selected independently. + insertDAGNode(*CurDAG, N, Zero); + insertDAGNode(*CurDAG, N, Neg); + } else + return false; + } else + return false; + + if (NewShiftAmt.getValueType() != MVT::i8) { + // Need to truncate the shift amount. + NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); + // Add to a correct topological ordering. + insertDAGNode(*CurDAG, N, NewShiftAmt); + } + + // Insert a new mask to keep the shift amount legal. This should be removed + // by isel patterns. + NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, + CurDAG->getConstant(Size - 1, DL, MVT::i8)); + // Place in a correct topological ordering. + insertDAGNode(*CurDAG, N, NewShiftAmt); + + // Recapture operand 1 so we can delete it if its unused. + ShiftAmt = N->getOperand(1); + CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewShiftAmt); + + // If the original ShiftAmt is now dead, delete it so that we don't run + // it through isel. + if (ShiftAmt.getNode()->use_empty()) + CurDAG->RemoveDeadNode(ShiftAmt.getNode()); + + // Now that we've optimized the shift amount, defer to normal isel to get + // load folding and legacy vs BMI2 selection without repeating it here. + SelectCode(N); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -2797,6 +2888,13 @@ return; } + case ISD::SRL: + case ISD::SRA: + case ISD::SHL: + if (tryShiftAmountMod(Node)) + return; + break; + case ISD::AND: if (matchBEXTRFromAnd(Node)) return; Index: test/CodeGen/X86/extract-lowbits.ll =================================================================== --- test/CodeGen/X86/extract-lowbits.ll +++ test/CodeGen/X86/extract-lowbits.ll @@ -1015,7 +1015,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_c0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1031,10 +1031,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movl $-1, %eax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1052,9 +1051,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_c1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl @@ -1067,9 +1067,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1089,7 +1089,7 @@ ; X86-NOBMI-LABEL: bzhi32_c2_load: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1106,10 +1106,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c2_load: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movl $-1, %eax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq @@ -1129,9 +1128,10 @@ ; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl @@ -1145,9 +1145,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq @@ -1167,7 +1167,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_c4_commutative: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1183,10 +1183,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movl $-1, %eax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1241,10 +1240,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1297,9 +1295,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movq $-1, %rax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1360,10 +1358,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c2_load: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq @@ -1423,9 +1420,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movq $-1, %rax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq @@ -1481,10 +1478,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1507,8 +1503,8 @@ ; X86-NOBMI-LABEL: bzhi32_d0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl $32, %ecx -; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: negl %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax @@ -1522,8 +1518,8 @@ ; ; X64-NOBMI-LABEL: bzhi32_d0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %edi ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi @@ -1544,8 +1540,8 @@ ; X86-NOBMI-LABEL: bzhi32_d1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movb $32, %cl -; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-NOBMI-NEXT: negb %cl ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl @@ -1558,9 +1554,10 @@ ; ; X64-NOBMI-LABEL: bzhi32_d1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi ; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1579,10 +1576,10 @@ define i32 @bzhi32_d2_load(i32* %w, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_d2_load: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movl $32, %ecx -; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx +; X86-NOBMI-NEXT: negl %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax @@ -1597,9 +1594,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_d2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax @@ -1619,10 +1616,10 @@ define i32 @bzhi32_d3_load_indexzext(i32* %w, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_d3_load_indexzext: ; X86-NOBMI: # %bb.0: +; X86-NOBMI-NEXT: movb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movb $32, %cl -; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl +; X86-NOBMI-NEXT: negb %cl ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl @@ -1636,10 +1633,11 @@ ; ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; @@ -1731,10 +1729,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_d0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1823,9 +1821,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_d1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1918,11 +1917,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; @@ -2013,10 +2012,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; Index: test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- test/CodeGen/X86/schedule-x86-64-shld.ll +++ test/CodeGen/X86/schedule-x86-64-shld.ll @@ -162,11 +162,10 @@ ; ; BTVER2-LABEL: lshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50] ; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] @@ -174,11 +173,10 @@ ; ; BDVER1-LABEL: lshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: shlq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shrq %cl, %rsi ; BDVER1-NEXT: orq %rdi, %rsi ; BDVER1-NEXT: movq %rsi, %rax @@ -236,11 +234,10 @@ ; ; BTVER2-LABEL: rshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50] ; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] @@ -248,11 +245,10 @@ ; ; BDVER1-LABEL: rshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shlq %cl, %rsi ; BDVER1-NEXT: orq %rdi, %rsi ; BDVER1-NEXT: movq %rsi, %rax @@ -310,11 +306,10 @@ ; BTVER2-LABEL: lshift_mem_cl: ; BTVER2: # %bb.0: # %entry ; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00] -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] ; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] @@ -322,12 +317,11 @@ ; ; BDVER1-LABEL: lshift_mem_cl: ; BDVER1: # %bb.0: # %entry +; BDVER1-NEXT: movq %rsi, %rcx ; BDVER1-NEXT: movq {{.*}}(%rip), %rax -; BDVER1-NEXT: movl %esi, %ecx ; BDVER1-NEXT: shlq %cl, %rax -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %esi, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shrq %cl, %rdi ; BDVER1-NEXT: orq %rax, %rdi ; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)