Index: lib/Target/X86/X86ISelDAGToDAG.cpp =================================================================== --- lib/Target/X86/X86ISelDAGToDAG.cpp +++ lib/Target/X86/X86ISelDAGToDAG.cpp @@ -456,6 +456,7 @@ bool matchBEXTRFromAnd(SDNode *Node); bool shrinkAndImmediate(SDNode *N); bool isMaskZeroExtended(SDNode *N) const; + bool tryShiftAmountMod(SDNode *N); MachineSDNode *emitPCMPISTR(unsigned ROpc, unsigned MOpc, bool MayFoldLoad, const SDLoc &dl, MVT VT, SDNode *Node); @@ -2677,6 +2678,95 @@ return CNode; } +bool X86DAGToDAGISel::tryShiftAmountMod(SDNode *N) { + EVT VT = N->getValueType(0); + + // Only handle scalar shifts. + if (VT.isVector()) + return false; + + // Narrower shifts only mask to 5 bits in hardware. + unsigned Size = VT == MVT::i64 ? 64 : 32; + + SDValue OrigShiftAmt = N->getOperand(1); + SDValue ShiftAmt = OrigShiftAmt; + SDLoc DL(N); + + // Skip over a truncate of the shift amount. + if (ShiftAmt->getOpcode() == ISD::TRUNCATE) + ShiftAmt = ShiftAmt->getOperand(0); + + // Special case to avoid messing up a BZHI pattern. + // Look for (srl (shl X, (size - y)), (size - y) + if (Subtarget->hasBMI2() && (VT == MVT::i32 || VT == MVT::i64) && + N->getOpcode() == ISD::SRL && N->getOperand(0).getOpcode() == ISD::SHL && + // Shift amounts the same? + N->getOperand(1) == N->getOperand(0).getOperand(1) && + // Shift amounts size - y? + ShiftAmt.getOpcode() == ISD::SUB && + isa(ShiftAmt.getOperand(0)) && + cast(ShiftAmt.getOperand(0))->getZExtValue() == Size) + return false; + + SDValue NewShiftAmt; + if (ShiftAmt->getOpcode() == ISD::ADD || ShiftAmt->getOpcode() == ISD::SUB) { + SDValue Add0 = ShiftAmt->getOperand(0); + SDValue Add1 = ShiftAmt->getOperand(1); + // If we are shifting by X+/-N where N == 0 mod Size, then just shift by X + // to avoid the ADD/SUB. + if (isa(Add1) && + cast(Add1)->getZExtValue() % Size == 0) { + NewShiftAmt = Add0; + // If we are shifting by N-X where N == 0 mod Size, then just shift by -X to + // generate a NEG instead of a SUB of a constant. + } else if (ShiftAmt->getOpcode() == ISD::SUB && + isa(Add0) && + cast(Add0)->getZExtValue() != 0 && + cast(Add0)->getZExtValue() % Size == 0) { + // Insert a negate op. + // TODO: This isn't guaranteed to replace the sub if there is a logic cone + // that uses it that's not a shift. + EVT SubVT = ShiftAmt.getValueType(); + SDValue Zero = CurDAG->getConstant(0, DL, SubVT); + SDValue Neg = CurDAG->getNode(ISD::SUB, DL, SubVT, Zero, Add1); + NewShiftAmt = Neg; + + // Insert these operands into a valid topological order so they can + // get selected independently. + insertDAGNode(*CurDAG, OrigShiftAmt, Zero); + insertDAGNode(*CurDAG, OrigShiftAmt, Neg); + } else + return false; + } else + return false; + + if (NewShiftAmt.getValueType() != MVT::i8) { + // Need to truncate the shift amount. + NewShiftAmt = CurDAG->getNode(ISD::TRUNCATE, DL, MVT::i8, NewShiftAmt); + // Add to a correct topological ordering. + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + } + + // Insert a new mask to keep the shift amount legal. This should be removed + // by isel patterns. + NewShiftAmt = CurDAG->getNode(ISD::AND, DL, MVT::i8, NewShiftAmt, + CurDAG->getConstant(Size - 1, DL, MVT::i8)); + // Place in a correct topological ordering. + insertDAGNode(*CurDAG, OrigShiftAmt, NewShiftAmt); + + N = CurDAG->UpdateNodeOperands(N, N->getOperand(0), NewShiftAmt); + + // If the original shift amount is now dead, delete it so that we don't run + // it through isel. + if (OrigShiftAmt.getNode()->use_empty()) + CurDAG->RemoveDeadNode(OrigShiftAmt.getNode()); + + // Now that we've optimized the shift amount, defer to normal isel to get + // load folding and legacy vs BMI2 selection without repeating it here. + SelectCode(N); + return true; +} + /// If the high bits of an 'and' operand are known zero, try setting the /// high bits of an 'and' constant operand to produce a smaller encoding by /// creating a small, sign-extended negative immediate rather than a large @@ -2797,6 +2887,13 @@ return; } + case ISD::SRL: + case ISD::SRA: + case ISD::SHL: + if (tryShiftAmountMod(Node)) + return; + break; + case ISD::AND: if (matchBEXTRFromAnd(Node)) return; Index: test/CodeGen/X86/clear-lowbits.ll =================================================================== --- test/CodeGen/X86/clear-lowbits.ll +++ test/CodeGen/X86/clear-lowbits.ll @@ -1104,7 +1104,7 @@ define i32 @clear_lowbits32_ic0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_ic0: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movl $32, %ecx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1114,7 +1114,7 @@ ; ; X86-BMI2-LABEL: clear_lowbits32_ic0: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movl $32, %eax +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl $-1, %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax @@ -1123,20 +1123,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic0: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $32, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %esi ; X64-NOBMI2-NEXT: movl $-1, %eax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: andl %edi, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic0: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: andl %edi, %eax ; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits @@ -1148,16 +1146,17 @@ define i32 @clear_lowbits32_ic1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movb $32, %cl +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI2-NEXT: retl ; ; X86-BMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb $32, %al +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $-1, %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax @@ -1166,19 +1165,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movb $32, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negb %sil ; X64-NOBMI2-NEXT: movl $-1, %eax +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: andl %edi, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic1_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $32, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: andl %edi, %eax ; X64-BMI2-NEXT: retq %numhighbits = sub i8 32, %numlowbits @@ -1192,7 +1190,7 @@ ; X86-NOBMI2-LABEL: clear_lowbits32_ic2_load: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movl $32, %ecx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1203,7 +1201,7 @@ ; X86-BMI2-LABEL: clear_lowbits32_ic2_load: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movl $32, %eax +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %eax, %edx, %eax @@ -1212,20 +1210,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic2_load: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $32, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %esi ; X64-NOBMI2-NEXT: movl $-1, %eax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: andl (%rdi), %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic2_load: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: andl (%rdi), %eax ; X64-BMI2-NEXT: retq %val = load i32, i32* %w @@ -1239,9 +1235,10 @@ ; X86-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X86-NOBMI2: # %bb.0: ; X86-NOBMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI2-NEXT: movb $32, %cl +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI2-NEXT: movl $-1, %eax +; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI2-NEXT: shll %cl, %eax ; X86-NOBMI2-NEXT: andl (%edx), %eax ; X86-NOBMI2-NEXT: retl @@ -1249,7 +1246,7 @@ ; X86-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-BMI2-NEXT: movb $32, %al +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subb {{[0-9]+}}(%esp), %al ; X86-BMI2-NEXT: movl $-1, %edx ; X86-BMI2-NEXT: shlxl %eax, %edx, %eax @@ -1258,19 +1255,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movb $32, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negb %sil ; X64-NOBMI2-NEXT: movl $-1, %eax +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: andl (%rdi), %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic3_load_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $32, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: andl (%rdi), %eax ; X64-BMI2-NEXT: retq %val = load i32, i32* %w @@ -1284,7 +1280,7 @@ define i32 @clear_lowbits32_ic4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: ; X86-NOBMI2: # %bb.0: -; X86-NOBMI2-NEXT: movl $32, %ecx +; X86-NOBMI2-NEXT: xorl %ecx, %ecx ; X86-NOBMI2-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI2-NEXT: movl $-1, %eax ; X86-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1294,7 +1290,7 @@ ; ; X86-BMI2-LABEL: clear_lowbits32_ic4_commutative: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movl $32, %eax +; X86-BMI2-NEXT: xorl %eax, %eax ; X86-BMI2-NEXT: subl {{[0-9]+}}(%esp), %eax ; X86-BMI2-NEXT: movl $-1, %ecx ; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax @@ -1303,20 +1299,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits32_ic4_commutative: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $32, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %esi ; X64-NOBMI2-NEXT: movl $-1, %eax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shll %cl, %eax ; X64-NOBMI2-NEXT: andl %edi, %eax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits32_ic4_commutative: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: movl $-1, %ecx -; X64-BMI2-NEXT: shlxl %eax, %ecx, %eax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: movl $-1, %eax +; X64-BMI2-NEXT: shlxl %esi, %eax, %eax ; X64-BMI2-NEXT: andl %edi, %eax ; X64-BMI2-NEXT: retq %numhighbits = sub i32 32, %numlowbits @@ -1365,20 +1359,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic0: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $64, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %esi ; X64-NOBMI2-NEXT: movq $-1, %rax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: andq %rdi, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic0: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $64, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: andq %rdi, %rax ; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits @@ -1425,19 +1417,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic1_indexzext: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movb $64, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negb %sil ; X64-NOBMI2-NEXT: movq $-1, %rax +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: andq %rdi, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic1_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $64, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: andq %rdi, %rax ; X64-BMI2-NEXT: retq %numhighbits = sub i8 64, %numlowbits @@ -1491,20 +1483,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic2_load: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $64, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %esi ; X64-NOBMI2-NEXT: movq $-1, %rax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: andq (%rdi), %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic2_load: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $64, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: andq (%rdi), %rax ; X64-BMI2-NEXT: retq %val = load i64, i64* %w @@ -1558,19 +1548,19 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic3_load_indexzext: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movb $64, %cl -; X64-NOBMI2-NEXT: subb %sil, %cl +; X64-NOBMI2-NEXT: negb %sil ; X64-NOBMI2-NEXT: movq $-1, %rax +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: andq (%rdi), %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic3_load_indexzext: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movb $64, %al -; X64-BMI2-NEXT: subb %sil, %al -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: # kill: def $esi killed $esi def $rsi +; X64-BMI2-NEXT: negb %sil +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: andq (%rdi), %rax ; X64-BMI2-NEXT: retq %val = load i64, i64* %w @@ -1619,20 +1609,18 @@ ; ; X64-NOBMI2-LABEL: clear_lowbits64_ic4_commutative: ; X64-NOBMI2: # %bb.0: -; X64-NOBMI2-NEXT: movl $64, %ecx -; X64-NOBMI2-NEXT: subl %esi, %ecx +; X64-NOBMI2-NEXT: negl %esi ; X64-NOBMI2-NEXT: movq $-1, %rax -; X64-NOBMI2-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI2-NEXT: movl %esi, %ecx ; X64-NOBMI2-NEXT: shlq %cl, %rax ; X64-NOBMI2-NEXT: andq %rdi, %rax ; X64-NOBMI2-NEXT: retq ; ; X64-BMI2-LABEL: clear_lowbits64_ic4_commutative: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $64, %eax -; X64-BMI2-NEXT: subl %esi, %eax -; X64-BMI2-NEXT: movq $-1, %rcx -; X64-BMI2-NEXT: shlxq %rax, %rcx, %rax +; X64-BMI2-NEXT: negl %esi +; X64-BMI2-NEXT: movq $-1, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax ; X64-BMI2-NEXT: andq %rdi, %rax ; X64-BMI2-NEXT: retq %numhighbits = sub i64 64, %numlowbits Index: test/CodeGen/X86/extract-lowbits.ll =================================================================== --- test/CodeGen/X86/extract-lowbits.ll +++ test/CodeGen/X86/extract-lowbits.ll @@ -1015,7 +1015,7 @@ define i32 @bzhi32_c0(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_c0: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1031,10 +1031,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movl $-1, %eax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1052,9 +1051,10 @@ define i32 @bzhi32_c1_indexzext(i32 %val, i8 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_c1_indexzext: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: retl @@ -1067,9 +1067,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1089,7 +1089,7 @@ ; X86-NOBMI-LABEL: bzhi32_c2_load: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1106,10 +1106,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c2_load: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movl $-1, %eax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq @@ -1129,9 +1128,10 @@ ; X86-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: movl $-1, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: andl (%edx), %eax ; X86-NOBMI-NEXT: retl @@ -1145,9 +1145,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movl $-1, %eax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl (%rdi), %eax ; X64-NOBMI-NEXT: retq @@ -1167,7 +1167,7 @@ define i32 @bzhi32_c4_commutative(i32 %val, i32 %numlowbits) nounwind { ; X86-NOBMI-LABEL: bzhi32_c4_commutative: ; X86-NOBMI: # %bb.0: -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: movl $-1, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1183,10 +1183,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_c4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movl $-1, %eax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: andl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1241,10 +1240,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1297,9 +1295,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movq $-1, %rax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1360,10 +1358,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c2_load: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq @@ -1423,9 +1420,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c3_load_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %sil ; X64-NOBMI-NEXT: movq $-1, %rax +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq (%rdi), %rax ; X64-NOBMI-NEXT: retq @@ -1481,10 +1478,9 @@ ; ; X64-NOBMI-LABEL: bzhi64_c4_commutative: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %esi ; X64-NOBMI-NEXT: movq $-1, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: andq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1507,7 +1503,7 @@ ; X86-NOBMI-LABEL: bzhi32_d0: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1522,8 +1518,8 @@ ; ; X64-NOBMI-LABEL: bzhi32_d0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %edi ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi @@ -1544,9 +1540,10 @@ ; X86-NOBMI-LABEL: bzhi32_d1_indexzext: ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; @@ -1558,9 +1555,10 @@ ; ; X64-NOBMI-LABEL: bzhi32_d1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %edi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %edi ; X64-NOBMI-NEXT: movl %edi, %eax ; X64-NOBMI-NEXT: retq @@ -1581,7 +1579,7 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movl $32, %ecx +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subl {{[0-9]+}}(%esp), %ecx ; X86-NOBMI-NEXT: shll %cl, %eax ; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx @@ -1597,9 +1595,9 @@ ; ; X64-NOBMI-LABEL: bzhi32_d2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movl $32, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shll %cl, %eax ; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax @@ -1621,9 +1619,10 @@ ; X86-NOBMI: # %bb.0: ; X86-NOBMI-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NOBMI-NEXT: movl (%eax), %eax -; X86-NOBMI-NEXT: movb $32, %cl +; X86-NOBMI-NEXT: xorl %ecx, %ecx ; X86-NOBMI-NEXT: subb {{[0-9]+}}(%esp), %cl ; X86-NOBMI-NEXT: shll %cl, %eax +; X86-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NOBMI-NEXT: shrl %cl, %eax ; X86-NOBMI-NEXT: retl ; @@ -1636,10 +1635,11 @@ ; ; X64-NOBMI-LABEL: bzhi32_d3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movl (%rdi), %eax -; X64-NOBMI-NEXT: movb $32, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shll %cl, %eax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrl %cl, %eax ; X64-NOBMI-NEXT: retq ; @@ -1731,10 +1731,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_d0: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: movq %rsi, %rcx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rdi -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1823,9 +1823,10 @@ ; ; X64-NOBMI-LABEL: bzhi64_d1_indexzext: ; X64-NOBMI: # %bb.0: -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: movl %esi, %ecx +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rdi +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rdi ; X64-NOBMI-NEXT: movq %rdi, %rax ; X64-NOBMI-NEXT: retq @@ -1918,11 +1919,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d2_load: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movq %rsi, %rcx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movl $64, %ecx -; X64-NOBMI-NEXT: subl %esi, %ecx +; X64-NOBMI-NEXT: negl %ecx ; X64-NOBMI-NEXT: shlq %cl, %rax -; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $rcx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; @@ -2013,10 +2014,11 @@ ; ; X64-NOBMI-LABEL: bzhi64_d3_load_indexzext: ; X64-NOBMI: # %bb.0: +; X64-NOBMI-NEXT: movl %esi, %ecx ; X64-NOBMI-NEXT: movq (%rdi), %rax -; X64-NOBMI-NEXT: movb $64, %cl -; X64-NOBMI-NEXT: subb %sil, %cl +; X64-NOBMI-NEXT: negb %cl ; X64-NOBMI-NEXT: shlq %cl, %rax +; X64-NOBMI-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NOBMI-NEXT: shrq %cl, %rax ; X64-NOBMI-NEXT: retq ; Index: test/CodeGen/X86/schedule-x86-64-shld.ll =================================================================== --- test/CodeGen/X86/schedule-x86-64-shld.ll +++ test/CodeGen/X86/schedule-x86-64-shld.ll @@ -162,11 +162,10 @@ ; ; BTVER2-LABEL: lshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shlq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shrq %cl, %rsi # sched: [1:0.50] ; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] @@ -174,11 +173,10 @@ ; ; BDVER1-LABEL: lshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: shlq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shrq %cl, %rsi ; BDVER1-NEXT: orq %rdi, %rsi ; BDVER1-NEXT: movq %rsi, %rax @@ -236,11 +234,10 @@ ; ; BTVER2-LABEL: rshift_cl: ; BTVER2: # %bb.0: # %entry -; BTVER2-NEXT: movl %edx, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rdx, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %edx, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shlq %cl, %rsi # sched: [1:0.50] ; BTVER2-NEXT: orq %rdi, %rsi # sched: [1:0.50] ; BTVER2-NEXT: movq %rsi, %rax # sched: [1:0.50] @@ -248,11 +245,10 @@ ; ; BDVER1-LABEL: rshift_cl: ; BDVER1: # %bb.0: # %entry -; BDVER1-NEXT: movl %edx, %ecx +; BDVER1-NEXT: movq %rdx, %rcx ; BDVER1-NEXT: shrq %cl, %rdi -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %edx, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shlq %cl, %rsi ; BDVER1-NEXT: orq %rdi, %rsi ; BDVER1-NEXT: movq %rsi, %rax @@ -310,11 +306,10 @@ ; BTVER2-LABEL: lshift_mem_cl: ; BTVER2: # %bb.0: # %entry ; BTVER2-NEXT: movq {{.*}}(%rip), %rax # sched: [5:1.00] -; BTVER2-NEXT: movl %esi, %ecx # sched: [1:0.50] +; BTVER2-NEXT: movq %rsi, %rcx # sched: [1:0.50] ; BTVER2-NEXT: shlq %cl, %rax # sched: [1:0.50] -; BTVER2-NEXT: movl $64, %ecx # sched: [1:0.50] -; BTVER2-NEXT: subl %esi, %ecx # sched: [1:0.50] -; BTVER2-NEXT: # kill: def $cl killed $cl killed $ecx +; BTVER2-NEXT: negl %ecx # sched: [1:0.50] +; BTVER2-NEXT: # kill: def $cl killed $cl killed $rcx ; BTVER2-NEXT: shrq %cl, %rdi # sched: [1:0.50] ; BTVER2-NEXT: orq %rax, %rdi # sched: [1:0.50] ; BTVER2-NEXT: movq %rdi, {{.*}}(%rip) # sched: [1:1.00] @@ -322,12 +317,11 @@ ; ; BDVER1-LABEL: lshift_mem_cl: ; BDVER1: # %bb.0: # %entry +; BDVER1-NEXT: movq %rsi, %rcx ; BDVER1-NEXT: movq {{.*}}(%rip), %rax -; BDVER1-NEXT: movl %esi, %ecx ; BDVER1-NEXT: shlq %cl, %rax -; BDVER1-NEXT: movl $64, %ecx -; BDVER1-NEXT: subl %esi, %ecx -; BDVER1-NEXT: # kill: def $cl killed $cl killed $ecx +; BDVER1-NEXT: negl %ecx +; BDVER1-NEXT: # kill: def $cl killed $cl killed $rcx ; BDVER1-NEXT: shrq %cl, %rdi ; BDVER1-NEXT: orq %rax, %rdi ; BDVER1-NEXT: movq %rdi, {{.*}}(%rip)