Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -456,6 +456,7 @@ bool matchBEXTRFromAnd(SDNode *Node); bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; + bool tryShiftAmountMod(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -2690,6 +2691,102 @@ return CNode; } +bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { + EVT VT = N->getValueType(0); + + // Only handle scalar shifts. + if (VT.isVector()) + return false; + + // Narrower shifts only mask to 5 bits in hardware. + unsigned Size = VT == MVT::i64 ? 64 : 32; + + SDValue OrigShiftAmt = N->getOperand(1); + SDValue ShiftAmt = OrigShiftAmt; + SDLoc DL(N); + + // Skip over a truncate of the shift amount. + if (ShiftAmt->getOpcode() == ISD::TRUNCATE) + ShiftAmt = ShiftAmt->getOperand(0); + + // Special case to avoid messing up a BZHI pattern. + // Look for (srl (shl X, (size - y)), (size - y) + if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) && + N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL && + // Shift amounts the same? + N->getOperand(1) == N->getOperand(0).getOperand(1) && + // Shift amounts size - y? + ShiftAmt.getOpcode() == ISD::SUB && + isa(ShiftAmt.getOperand(0)) && + cast(ShiftAmt.getOperand(0))->getZExtValue() == Size) + return false; + + SDValue NewShiftAmt; + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + SDValue Add0 = ShiftAmt->getOperand(0); + SDValue Add1 = ShiftAmt->getOperand(1); + // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB. + if (isa(Add1) && + cast(Add1)->getZExtValue() % Size == 0) { + NewShiftAmt = Add0; + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + } else if (ShiftAmt->getOpcode() == ISD::SUB && + isa(Add0) && + cast(Add0)->getZExtValue() != 0 && + cast(Add0)->getZExtValue() % Size == 0) { + // Insert a negate op. + // TODO: This isn't guaranteed to replace the sub if there is a logic cone + // that uses it that's not a shift. + EVT SubVT = ShiftAmt.getValueType(); + SDValue Zero = CurDAG->getConstant(0, DL, SubVT); + SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1); + NewShiftAmt = Neg; + + // Insert these operands into a valid topological order so they can + // get selected independently. + insertDAGNode(*CurDAG, OrigShiftAmt, Zero); + insertDAGNode(*CurDAG, OrigShiftAmt, Neg); + } else + return false; + } else + return false; + + if (NewShiftAmt.getValueType() != MVT::i8) { + // Need to truncate the shift amount. + NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); + // Add to a correct topological ordering. + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + } + + // Insert a new mask to keep the shift amount legal. This should be removed + // by isel patterns. + NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, + CurDAG->getConstant(Size - 1, DL, MVT::i8)); + // Place in a correct topological ordering. + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + + SDNode *UpdatedNode = CurDAG->UpdateNodeOperands(N, N->getOperand(0), + NewShiftAmt); + if (UpdatedNode != N) { + // If we found an existing node, we should replace ourselves with that node + // and wait for it to be selected after its other users. + ReplaceNode(N, UpdatedNode); + return true; + } + + // If the original shift amount is now dead, delete it so that we don't run + // it through isel. + if (OrigShiftAmt.getNode()->use_empty()) + CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); + + // Now that we've optimized the shift amount, defer to normal isel to get + // load folding and legacy vs BMI2 selection without repeating it here. + SelectCode(N); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -2820,6 +2917,13 @@ return; } + case ISD::SRL: + case ISD::SRA: + case ISD::SHL: + if (tryShiftAmountMod(Node)) + return; + break; + case ISD::AND: if (matchBEXTRFromAnd(Node)) return; Index: test/CodeGen/X86/clear-lowbits.ll =================================================================== --- test/CodeGen/X86/clear-lowbits.ll +++ test/CodeGen/X86/clear-lowbits.ll @@ -1108,7 +1108,7 @@ ; X86-NOBMI2-LABEL: clear_lowbits32_ic0: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movl $32, %ecx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1117,7 +1117,7 @@ ; ; X86-BMI2-LABEL: clear_lowbits32_ic0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movl $32, %eax +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax @@ -1125,8 +1125,8 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic0: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $32, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: negl %ecx ; X64-NOBMI2-NEXT: shrl %cl, %edi ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %edi @@ -1135,10 +1135,9 @@ ; ; X64-BMI2-LABEL: clear_lowbits32_ic0: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = shl i32 -1, %numhighbits @@ -1150,15 +1149,16 @@ ; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movb $32, %cl +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: shrl %cl, %eax +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb $32, %al +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax @@ -1166,19 +1166,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movb $32, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: negb %cl ; X64-NOBMI2-NEXT: shrl %cl, %edi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %edi ; X64-NOBMI2-NEXT: movl %edi, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $32, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits %sh_prom = zext i8 %numhighbits to i32 @@ -1192,7 +1192,7 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl (%eax), %eax -; X86-NOBMI2-NEXT: movl $32, %ecx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1202,7 +1202,7 @@ ; X86-BMI2-LABEL: clear_lowbits32_ic2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movl $32, %ecx +; X86-BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax @@ -1210,9 +1210,9 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: movl $32, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %ecx ; X64-NOBMI2-NEXT: shrl %cl, %eax ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %eax @@ -1220,10 +1220,9 @@ ; ; X64-BMI2-LABEL: clear_lowbits32_ic2_load: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i32 32, %numlowbits @@ -1237,16 +1236,17 @@ ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: movl (%eax), %eax -; X86-NOBMI2-NEXT: movb $32, %cl +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: shrl %cl, %eax +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: movb $32, %cl +; X86-BMI2-NEXT: xorl %ecx, %ecx ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-BMI2-NEXT: shrxl %ecx, (%eax), %eax ; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax @@ -1254,19 +1254,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movl (%rdi), %eax -; X64-NOBMI2-NEXT: movb $32, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negb %cl ; X64-NOBMI2-NEXT: shrl %cl, %eax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $32, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxl %eax, (%rdi), %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: shrxl %esi, (%rdi), %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq %val = load i32, i32* %w %numhighbits = sub i8 32, %numlowbits @@ -1280,7 +1280,7 @@ ; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI2-NEXT: movl $32, %ecx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: shrl %cl, %eax ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1289,7 +1289,7 @@ ; ; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movl $32, %eax +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax @@ -1297,8 +1297,8 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $32, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: negl %ecx ; X64-NOBMI2-NEXT: shrl %cl, %edi ; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shll %cl, %edi @@ -1307,10 +1307,9 @@ ; ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: shrxl %eax, %edi, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits %mask = shl i32 -1, %numhighbits @@ -1358,20 +1357,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic0: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $64, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: negl %ecx ; X64-NOBMI2-NEXT: shrq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic0: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $64, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = shl i64 -1, %numhighbits @@ -1417,19 +1415,20 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movb $64, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: movl %esi, %ecx +; X64-NOBMI2-NEXT: negb %cl ; X64-NOBMI2-NEXT: shrq %cl, %rdi +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shlq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $64, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits %sh_prom = zext i8 %numhighbits to i64 @@ -1482,20 +1481,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movq %rsi, %rcx ; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: movl $64, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %ecx ; X64-NOBMI2-NEXT: shrq %cl, %rax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic2_load: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $64, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i64 64, %numlowbits @@ -1548,19 +1546,20 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: ; X64-NOBMI2: # %bb.0: +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: movq (%rdi), %rax -; X64-NOBMI2-NEXT: movb $64, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negb %cl ; X64-NOBMI2-NEXT: shrq %cl, %rax +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $64, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: shrxq %rax, (%rdi), %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: shrxq %rsi, (%rdi), %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq %val = load i64, i64* %w %numhighbits = sub i8 64, %numlowbits @@ -1608,20 +1607,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $64, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: movq %rsi, %rcx +; X64-NOBMI2-NEXT: negl %ecx ; X64-NOBMI2-NEXT: shrq %cl, %rdi -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI2-NEXT: shlq %cl, %rdi ; X64-NOBMI2-NEXT: movq %rdi, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $64, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: shrxq %rax, %rdi, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits %mask = shl i64 -1, %numhighbits Index: test/CodeGen/X86/extract-lowbits.ll =================================================================== --- test/CodeGen/X86/extract-lowbits.ll +++ test/CodeGen/X86/extract-lowbits.ll @@ -1016,7 +1016,7 @@ ; X86-NOBMI-LABEL: bzhi32_c0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1031,8 +1031,8 @@ ; ; X64-NOBMI-LABEL: bzhi32_c0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %edi ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi @@ -1053,9 +1053,10 @@ ; X86-NOBMI-LABEL: bzhi32_c1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; @@ -1067,9 +1068,10 @@ ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi ; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1090,7 +1092,7 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1106,9 +1108,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax @@ -1130,9 +1132,10 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; @@ -1145,10 +1148,11 @@ ; ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; @@ -1168,7 +1172,7 @@ ; X86-NOBMI-LABEL: bzhi32_c4_commutative: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1183,8 +1187,8 @@ ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %edi ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi @@ -1241,10 +1245,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_c0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1297,9 +1301,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_c1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1360,11 +1365,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_c2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; @@ -1423,10 +1428,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; @@ -1481,10 +1487,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_c4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1507,7 +1513,7 @@ ; X86-NOBMI-LABEL: bzhi32_d0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1522,8 +1528,8 @@ ; ; X64-NOBMI-LABEL: bzhi32_d0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %edi ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi @@ -1544,9 +1550,10 @@ ; X86-NOBMI-LABEL: bzhi32_d1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; @@ -1558,9 +1565,10 @@ ; ; X64-NOBMI-LABEL: bzhi32_d1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi ; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1581,7 +1589,7 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1597,9 +1605,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_d2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax @@ -1621,9 +1629,10 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; @@ -1636,10 +1645,11 @@ ; ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; @@ -1731,10 +1741,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_d0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1823,9 +1833,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_d1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1918,11 +1929,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; @@ -2013,10 +2024,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; Index: test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- test/CodeGen/X86/schedule-x86-64-shld.ll +++ test/CodeGen/X86/schedule-x86-64-shld.ll @@ -162,11 +162,10 @@ ; ; BTVER2-LABEL: lshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50] ; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] @@ -174,11 +173,10 @@ ; ; BDVER1-LABEL: lshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: shlq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shrq %cl, %rsi ; BDVER1-NEXT: orq %rdi, %rsi ; BDVER1-NEXT: movq %rsi, %rax @@ -236,11 +234,10 @@ ; ; BTVER2-LABEL: rshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50] ; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] @@ -248,11 +245,10 @@ ; ; BDVER1-LABEL: rshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shlq %cl, %rsi ; BDVER1-NEXT: orq %rdi, %rsi ; BDVER1-NEXT: movq %rsi, %rax @@ -310,11 +306,10 @@ ; BTVER2-LABEL: lshift_mem_cl: ; BTVER2: # %bb.0: # %entry ; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00] -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] ; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] @@ -322,12 +317,11 @@ ; ; BDVER1-LABEL: lshift_mem_cl: ; BDVER1: # %bb.0: # %entry +; BDVER1-NEXT: movq %rsi, %rcx ; BDVER1-NEXT: movq {{.*}}(%rip), %rax -; BDVER1-NEXT: movl %esi, %ecx ; BDVER1-NEXT: shlq %cl, %rax -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %esi, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shrq %cl, %rdi ; BDVER1-NEXT: orq %rax, %rdi ; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)