diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1018,6 +1018,9 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -47435,7 +47435,134 @@ return DAG.getNode(ExtOpc, DL, VT, Mulh); } -static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { +// Try and re-ordering an `and` and `srl/shl` if it results in a better constant +// for the `and`. Note this only tries to optimize by re-ordering, other +// patterns like (and (shl x, 4), 240) -> (and (shl x, 4), 255) (for the movzbl) +// are handled elsewhere. +static SDValue +combineLogicalShiftWithAnd(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // Only do this on the last DAG combine as it can interfere with other + // combines. This is also necessary to avoid and infinite loop between this + // and `DAGCombiner::visitShiftByConstant`. + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDNode *ShiftOp, *AndOp; + SDValue RawVal; + assert(N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SHL || + N->getOpcode() == ISD::AND); + + // Get ShiftOp, AndOp and RawVal (RawVal being the shifted amount assuming no + // and). + if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SHL) { + ShiftOp = N; + AndOp = N->getOperand(0).getNode(); + if (AndOp->getOpcode() != ISD::AND || !AndOp->hasOneUse()) + return SDValue(); + } else { + AndOp = N; + unsigned Idx; + for (Idx = 0; Idx < 2; ++Idx) { + ShiftOp = N->getOperand(Idx).getNode(); + if ((ShiftOp->getOpcode() == ISD::SRL || + ShiftOp->getOpcode() == ISD::SHL) && + ShiftOp->hasOneUse()) { + RawVal = ShiftOp->getOperand(0); + break; + } + } + if (Idx == 2) + return SDValue(); + } + + assert(ShiftOp->getOpcode() == ISD::SRL || ShiftOp->getOpcode() == ISD::SHL); + assert(AndOp->getOpcode() == ISD::AND); + + // Get the `and` mask and RawVal if we didn't get it earlier. + auto *AndC = dyn_cast(AndOp->getOperand(0)); + if (AndC == nullptr) { + AndC = dyn_cast(AndOp->getOperand(1)); + if (!RawVal) + RawVal = AndOp->getOperand(0); + } else if (!RawVal) { + RawVal = AndOp->getOperand(1); + } + EVT VT = RawVal.getValueType(); + // TODO: Makes sense to do this on vector types if it allows us to use a mask + // thats easier to create. + if (!VT.isScalarInteger()) + return SDValue(); + + SDLoc DL(N); + // Get the `srl` amount, only proceed if both `srl` amt and `and` mask are + // constant. + auto *ShiftC = dyn_cast(ShiftOp->getOperand(1)); + if (ShiftC == nullptr || AndC == nullptr) + return SDValue(); + + APInt AndMask = AndC->getAPIntValue(); + unsigned ShiftCnt = ShiftC->getZExtValue(); + + // If `AndMask` is already in form for `movl/movzwl/movzbl` then nothing to + // do. + unsigned MaskCnt = AndMask.getBitWidth() - AndMask.countLeadingZeros(); + + if (AndMask.isMask()) { + assert(MaskCnt == AndMask.countPopulation()); + if (MaskCnt >= 8 && isPowerOf2_32(MaskCnt)) + return SDValue(); + } + + for (unsigned MaskIdx = 0; MaskIdx < 2; ++MaskIdx) { + // Determine Mask if we swap order of `srl/shl` and `and`. + APInt NewAndMask; + if (MaskIdx) { + if (N->getOpcode() == ISD::AND && ShiftOp->getOpcode() == ISD::SHL) + NewAndMask = AndMask.lshr(ShiftCnt); + else + break; + } else if (N->getOpcode() == ISD::AND || N->getOpcode() == ISD::SHL) { + // Will never be beneficial if we can't extend a mask. + if (!AndMask.isMask()) + continue; + + NewAndMask = + APInt::getAllOnes(ShiftCnt + MaskCnt).zext(AndMask.getBitWidth()); + } else { + NewAndMask = AndMask.lshr(ShiftCnt); + } + + // If we can build a mask that can be `movl/movzwl/movzbl` OR just shrink + // the mask (potentially getting better encoding) then do so. + bool SwapOrder = false; + if (NewAndMask.isMask()) { + unsigned NewMaskCnt = NewAndMask.countPopulation(); + SwapOrder = isPowerOf2_32(NewMaskCnt) && NewMaskCnt >= 8; + } + if (!SwapOrder) + SwapOrder = + NewAndMask.getSignificantBits() < AndMask.getSignificantBits(); + if (!SwapOrder) + continue; + + SDValue ret; + if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SHL) + return DAG.getNode(ISD::AND, DL, VT, + DAG.getNode(N->getOpcode(), DL, VT, RawVal, + DAG.getConstant(ShiftCnt, DL, VT)), + DAG.getConstant(NewAndMask, DL, VT)); + + return DAG.getNode(ShiftOp->getOpcode(), DL, VT, + DAG.getNode(ISD::AND, DL, VT, RawVal, + DAG.getConstant(NewAndMask, DL, VT)), + DAG.getConstant(ShiftCnt, DL, VT)); + } + return SDValue(); +} + +static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); @@ -47477,6 +47604,9 @@ } } + if (SDValue V = combineLogicalShiftWithAnd(N, DAG, DCI)) + return V; + return SDValue(); } @@ -47539,10 +47669,6 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N0.getValueType(); - if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; @@ -47551,41 +47677,9 @@ if (!DCI.isAfterLegalizeDAG()) return SDValue(); - // Try to improve a sequence of srl (and X, C1), C2 by inverting the order. - // TODO: This is a generic DAG combine that became an x86-only combine to - // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and - // and-not ('andn'). - if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) - return SDValue(); - - auto *ShiftC = dyn_cast(N1); - auto *AndC = dyn_cast(N0.getOperand(1)); - if (!ShiftC || !AndC) - return SDValue(); - - // If we can shrink the constant mask below 8-bits or 32-bits, then this - // transform should reduce code size. It may also enable secondary transforms - // from improved known-bits analysis or instruction selection. - APInt MaskVal = AndC->getAPIntValue(); - - // If this can be matched by a zero extend, don't optimize. - if (MaskVal.isMask()) { - unsigned TO = MaskVal.countTrailingOnes(); - if (TO >= 8 && isPowerOf2_32(TO)) - return SDValue(); - } + if (SDValue V = combineLogicalShiftWithAnd(N, DAG, DCI)) + return V; - APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); - unsigned OldMaskSize = MaskVal.getMinSignedBits(); - unsigned NewMaskSize = NewMaskVal.getMinSignedBits(); - if ((OldMaskSize > 8 && NewMaskSize <= 8) || - (OldMaskSize > 32 && NewMaskSize <= 32)) { - // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC) - SDLoc DL(N); - SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); - SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); - return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); - } return SDValue(); } @@ -49033,6 +49127,9 @@ } } + if (SDValue V = combineLogicalShiftWithAnd(N, DAG, DCI)) + return V; + return SDValue(); } @@ -56085,7 +56182,7 @@ case X86ISD::SBB: return combineSBB(N, DAG); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); - case ISD::SHL: return combineShiftLeft(N, DAG); + case ISD::SHL: return combineShiftLeft(N, DAG, DCI); case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget); case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget); case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); @@ -56240,6 +56337,18 @@ return SDValue(); } +bool X86TargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { + if (Level < AfterLegalizeDAG) + return true; + + if (N->getOpcode() == ISD::SRL || N->getOpcode() == ISD::SHL) + if (N->getOperand(0).getOpcode() == ISD::AND) + return false; + + return true; +} + bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -959,12 +959,12 @@ ; KNL-NEXT: orl %r9d, %r8d ; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: shll $6, %r10d -; KNL-NEXT: andl $1, %r11d ; KNL-NEXT: shll $7, %r11d -; KNL-NEXT: orl %r10d, %r11d +; KNL-NEXT: movzbl %r11b, %ecx +; KNL-NEXT: orl %r10d, %ecx ; KNL-NEXT: andl $1, %ebx ; KNL-NEXT: shll $8, %ebx -; KNL-NEXT: orl %r11d, %ebx +; KNL-NEXT: orl %ecx, %ebx ; KNL-NEXT: andl $1, %r14d ; KNL-NEXT: shll $9, %r14d ; KNL-NEXT: orl %ebx, %r14d @@ -983,11 +983,11 @@ ; KNL-NEXT: andl $1, %edx ; KNL-NEXT: shll $14, %edx ; KNL-NEXT: orl %r13d, %edx -; KNL-NEXT: andl $1, %esi ; KNL-NEXT: shll $15, %esi -; KNL-NEXT: orl %edx, %esi -; KNL-NEXT: orl %ebp, %esi -; KNL-NEXT: movw %si, (%rax) +; KNL-NEXT: movzwl %si, %ecx +; KNL-NEXT: orl %edx, %ecx +; KNL-NEXT: orl %ebp, %ecx +; KNL-NEXT: movw %cx, (%rax) ; KNL-NEXT: popq %rbx ; KNL-NEXT: popq %r12 ; KNL-NEXT: popq %r13 @@ -1272,12 +1272,12 @@ ; SKX-NEXT: orl %r9d, %r8d ; SKX-NEXT: andl $1, %r10d ; SKX-NEXT: shll $6, %r10d -; SKX-NEXT: andl $1, %r11d ; SKX-NEXT: shll $7, %r11d -; SKX-NEXT: orl %r10d, %r11d +; SKX-NEXT: movzbl %r11b, %ecx +; SKX-NEXT: orl %r10d, %ecx ; SKX-NEXT: andl $1, %ebx ; SKX-NEXT: shll $8, %ebx -; SKX-NEXT: orl %r11d, %ebx +; SKX-NEXT: orl %ecx, %ebx ; SKX-NEXT: andl $1, %r14d ; SKX-NEXT: shll $9, %r14d ; SKX-NEXT: orl %ebx, %r14d @@ -1296,11 +1296,11 @@ ; SKX-NEXT: andl $1, %edx ; SKX-NEXT: shll $14, %edx ; SKX-NEXT: orl %r13d, %edx -; SKX-NEXT: andl $1, %esi ; SKX-NEXT: shll $15, %esi -; SKX-NEXT: orl %edx, %esi -; SKX-NEXT: orl %ebp, %esi -; SKX-NEXT: movw %si, (%rax) +; SKX-NEXT: movzwl %si, %ecx +; SKX-NEXT: orl %edx, %ecx +; SKX-NEXT: orl %ebp, %ecx +; SKX-NEXT: movw %cx, (%rax) ; SKX-NEXT: popq %rbx ; SKX-NEXT: popq %r12 ; SKX-NEXT: popq %r13 @@ -1551,84 +1551,85 @@ ; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kandw %k1, %k2, %k1 -; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; KNL_X32-NEXT: kmovw %k1, %ebx ; KNL_X32-NEXT: kshiftrw $1, %k0, %k1 -; KNL_X32-NEXT: kmovw %k1, %ebp +; KNL_X32-NEXT: kmovw %k1, %eax ; KNL_X32-NEXT: kshiftrw $2, %k0, %k1 -; KNL_X32-NEXT: kmovw %k1, %esi -; KNL_X32-NEXT: kshiftrw $3, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %edi +; KNL_X32-NEXT: kshiftrw $3, %k0, %k1 +; KNL_X32-NEXT: kmovw %k1, %esi ; KNL_X32-NEXT: kshiftrw $4, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %edx ; KNL_X32-NEXT: kshiftrw $5, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %ecx ; KNL_X32-NEXT: kshiftrw $6, %k0, %k1 ; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: movb %bl, 2(%eax) +; KNL_X32-NEXT: movb %bl, 2(%ebp) ; KNL_X32-NEXT: kmovw %k0, %ebx ; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: andl $1, %ebp -; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebx -; KNL_X32-NEXT: kmovw %k1, %ebp +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: leal (%ebx,%eax,2), %eax +; KNL_X32-NEXT: kmovw %k1, %ebx ; KNL_X32-NEXT: kshiftrw $7, %k0, %k1 +; KNL_X32-NEXT: andl $1, %edi +; KNL_X32-NEXT: leal (%eax,%edi,4), %edi +; KNL_X32-NEXT: kmovw %k1, %eax +; KNL_X32-NEXT: kshiftrw $8, %k0, %k1 ; KNL_X32-NEXT: andl $1, %esi -; KNL_X32-NEXT: leal (%ebx,%esi,4), %ebx +; KNL_X32-NEXT: leal (%edi,%esi,8), %edi ; KNL_X32-NEXT: kmovw %k1, %esi -; KNL_X32-NEXT: kshiftrw $8, %k0, %k1 -; KNL_X32-NEXT: andl $1, %edi -; KNL_X32-NEXT: leal (%ebx,%edi,8), %ebx -; KNL_X32-NEXT: kmovw %k1, %edi ; KNL_X32-NEXT: kshiftrw $9, %k0, %k1 ; KNL_X32-NEXT: andl $1, %edx ; KNL_X32-NEXT: shll $4, %edx -; KNL_X32-NEXT: orl %ebx, %edx -; KNL_X32-NEXT: kmovw %k1, %ebx +; KNL_X32-NEXT: orl %edi, %edx +; KNL_X32-NEXT: kmovw %k1, %edi ; KNL_X32-NEXT: kshiftrw $10, %k0, %k1 ; KNL_X32-NEXT: andl $1, %ecx ; KNL_X32-NEXT: shll $5, %ecx ; KNL_X32-NEXT: orl %edx, %ecx ; KNL_X32-NEXT: kmovw %k1, %edx ; KNL_X32-NEXT: kshiftrw $11, %k0, %k1 -; KNL_X32-NEXT: andl $1, %ebp -; KNL_X32-NEXT: shll $6, %ebp +; KNL_X32-NEXT: andl $1, %ebx +; KNL_X32-NEXT: shll $6, %ebx +; KNL_X32-NEXT: shll $7, %eax +; KNL_X32-NEXT: movzbl %al, %eax +; KNL_X32-NEXT: orl %ebx, %eax +; KNL_X32-NEXT: kmovw %k1, %ebx +; KNL_X32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_X32-NEXT: andl $1, %esi -; KNL_X32-NEXT: shll $7, %esi -; KNL_X32-NEXT: orl %ebp, %esi +; KNL_X32-NEXT: shll $8, %esi +; KNL_X32-NEXT: orl %eax, %esi ; KNL_X32-NEXT: kmovw %k1, %ebp -; KNL_X32-NEXT: kshiftrw $12, %k0, %k1 +; KNL_X32-NEXT: kshiftrw $13, %k0, %k1 ; KNL_X32-NEXT: andl $1, %edi -; KNL_X32-NEXT: shll $8, %edi +; KNL_X32-NEXT: shll $9, %edi ; KNL_X32-NEXT: orl %esi, %edi -; KNL_X32-NEXT: kmovw %k1, %esi -; KNL_X32-NEXT: kshiftrw $13, %k0, %k1 -; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: shll $9, %ebx -; KNL_X32-NEXT: orl %edi, %ebx -; KNL_X32-NEXT: kmovw %k1, %edi +; KNL_X32-NEXT: kmovw %k1, %eax ; KNL_X32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_X32-NEXT: andl $1, %edx ; KNL_X32-NEXT: shll $10, %edx -; KNL_X32-NEXT: orl %ebx, %edx -; KNL_X32-NEXT: kmovw %k1, %ebx +; KNL_X32-NEXT: orl %edi, %edx +; KNL_X32-NEXT: kmovw %k1, %esi ; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 ; KNL_X32-NEXT: orl %ecx, %edx ; KNL_X32-NEXT: kmovw %k0, %ecx +; KNL_X32-NEXT: andl $1, %ebx +; KNL_X32-NEXT: shll $11, %ebx ; KNL_X32-NEXT: andl $1, %ebp -; KNL_X32-NEXT: shll $11, %ebp +; KNL_X32-NEXT: shll $12, %ebp +; KNL_X32-NEXT: orl %ebx, %ebp +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: shll $13, %eax +; KNL_X32-NEXT: orl %ebp, %eax ; KNL_X32-NEXT: andl $1, %esi -; KNL_X32-NEXT: shll $12, %esi -; KNL_X32-NEXT: orl %ebp, %esi -; KNL_X32-NEXT: andl $1, %edi -; KNL_X32-NEXT: shll $13, %edi -; KNL_X32-NEXT: orl %esi, %edi -; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: shll $14, %ebx -; KNL_X32-NEXT: orl %edi, %ebx -; KNL_X32-NEXT: andl $1, %ecx +; KNL_X32-NEXT: shll $14, %esi +; KNL_X32-NEXT: orl %eax, %esi ; KNL_X32-NEXT: shll $15, %ecx -; KNL_X32-NEXT: orl %ebx, %ecx +; KNL_X32-NEXT: movzwl %cx, %ecx +; KNL_X32-NEXT: orl %esi, %ecx ; KNL_X32-NEXT: orl %edx, %ecx +; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_X32-NEXT: movw %cx, (%eax) ; KNL_X32-NEXT: addl $16, %esp ; KNL_X32-NEXT: popl %esi @@ -1913,12 +1914,12 @@ ; FASTISEL-NEXT: orl %r9d, %r8d ; FASTISEL-NEXT: andl $1, %r10d ; FASTISEL-NEXT: shll $6, %r10d -; FASTISEL-NEXT: andl $1, %r11d ; FASTISEL-NEXT: shll $7, %r11d -; FASTISEL-NEXT: orl %r10d, %r11d +; FASTISEL-NEXT: movzbl %r11b, %ecx +; FASTISEL-NEXT: orl %r10d, %ecx ; FASTISEL-NEXT: andl $1, %ebx ; FASTISEL-NEXT: shll $8, %ebx -; FASTISEL-NEXT: orl %r11d, %ebx +; FASTISEL-NEXT: orl %ecx, %ebx ; FASTISEL-NEXT: andl $1, %r14d ; FASTISEL-NEXT: shll $9, %r14d ; FASTISEL-NEXT: orl %ebx, %r14d @@ -1937,11 +1938,11 @@ ; FASTISEL-NEXT: andl $1, %edx ; FASTISEL-NEXT: shll $14, %edx ; FASTISEL-NEXT: orl %r13d, %edx -; FASTISEL-NEXT: andl $1, %esi ; FASTISEL-NEXT: shll $15, %esi -; FASTISEL-NEXT: orl %edx, %esi -; FASTISEL-NEXT: orl %ebp, %esi -; FASTISEL-NEXT: movw %si, (%rax) +; FASTISEL-NEXT: movzwl %si, %ecx +; FASTISEL-NEXT: orl %edx, %ecx +; FASTISEL-NEXT: orl %ebp, %ecx +; FASTISEL-NEXT: movw %cx, (%rax) ; FASTISEL-NEXT: popq %rbx ; FASTISEL-NEXT: popq %r12 ; FASTISEL-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -513,38 +513,39 @@ define i4 @test_bitreverse_i4(i4 %a) { ; X86-LABEL: test_bitreverse_i4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andb $8, %al -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addb %cl, %dl -; X86-NEXT: andb $4, %dl -; X86-NEXT: movb %cl, %ah -; X86-NEXT: shlb $3, %ah -; X86-NEXT: andb $8, %ah -; X86-NEXT: orb %dl, %ah +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: andb $2, %cl +; X86-NEXT: addb %cl, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: andb $1, %dl +; X86-NEXT: shlb $3, %dl +; X86-NEXT: orb %cl, %dl +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrb %cl ; X86-NEXT: andb $2, %cl -; X86-NEXT: orb %ah, %cl +; X86-NEXT: orb %dl, %cl ; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $1, %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: test_bitreverse_i4: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $8, %al -; X64-NEXT: leal (%rdi,%rdi), %ecx -; X64-NEXT: andb $4, %cl -; X64-NEXT: leal (,%rdi,8), %edx -; X64-NEXT: andb $8, %dl -; X64-NEXT: orb %cl, %dl -; X64-NEXT: shrb %dil -; X64-NEXT: andb $2, %dil -; X64-NEXT: orb %dil, %dl -; X64-NEXT: shrb $3, %al -; X64-NEXT: orb %dl, %al +; X64-NEXT: andb $2, %al +; X64-NEXT: addb %al, %al +; X64-NEXT: movl %edi, %ecx +; X64-NEXT: andb $1, %cl +; X64-NEXT: shlb $3, %cl +; X64-NEXT: orb %al, %cl +; X64-NEXT: movl %edi, %eax +; X64-NEXT: shrb %al +; X64-NEXT: andb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: shrb $3, %dil +; X64-NEXT: andb $1, %dil +; X64-NEXT: orb %dil, %al ; X64-NEXT: retq ; ; X86XOP-LABEL: test_bitreverse_i4: @@ -558,19 +559,20 @@ ; ; GFNI-LABEL: test_bitreverse_i4: ; GFNI: # %bb.0: -; GFNI-NEXT: # kill: def $edi killed $edi def $rdi ; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andb $8, %al -; GFNI-NEXT: leal (%rdi,%rdi), %ecx -; GFNI-NEXT: andb $4, %cl -; GFNI-NEXT: leal (,%rdi,8), %edx -; GFNI-NEXT: andb $8, %dl -; GFNI-NEXT: orb %cl, %dl -; GFNI-NEXT: shrb %dil -; GFNI-NEXT: andb $2, %dil -; GFNI-NEXT: orb %dil, %dl -; GFNI-NEXT: shrb $3, %al -; GFNI-NEXT: orb %dl, %al +; GFNI-NEXT: andb $2, %al +; GFNI-NEXT: addb %al, %al +; GFNI-NEXT: movl %edi, %ecx +; GFNI-NEXT: andb $1, %cl +; GFNI-NEXT: shlb $3, %cl +; GFNI-NEXT: orb %al, %cl +; GFNI-NEXT: movl %edi, %eax +; GFNI-NEXT: shrb %al +; GFNI-NEXT: andb $2, %al +; GFNI-NEXT: orb %cl, %al +; GFNI-NEXT: shrb $3, %dil +; GFNI-NEXT: andb $1, %dil +; GFNI-NEXT: orb %dil, %al ; GFNI-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll --- a/llvm/test/CodeGen/X86/bmi-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll @@ -16,18 +16,11 @@ } define i64 @bextr64b(i64 %x) uwtable ssp { -; BEXTR-SLOW-LABEL: bextr64b: -; BEXTR-SLOW: # %bb.0: -; BEXTR-SLOW-NEXT: movq %rdi, %rax -; BEXTR-SLOW-NEXT: shrl $4, %eax -; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF -; BEXTR-SLOW-NEXT: retq -; -; BEXTR-FAST-LABEL: bextr64b: -; BEXTR-FAST: # %bb.0: -; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 -; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax -; BEXTR-FAST-NEXT: retq +; CHECK-LABEL: bextr64b: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: retq %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 ret i64 %2 @@ -46,18 +39,11 @@ } define i64 @bextr64b_load(ptr %x) { -; BEXTR-SLOW-LABEL: bextr64b_load: -; BEXTR-SLOW: # %bb.0: -; BEXTR-SLOW-NEXT: movl (%rdi), %eax -; BEXTR-SLOW-NEXT: shrl $4, %eax -; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF -; BEXTR-SLOW-NEXT: retq -; -; BEXTR-FAST-LABEL: bextr64b_load: -; BEXTR-FAST: # %bb.0: -; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 -; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax -; BEXTR-FAST-NEXT: retq +; CHECK-LABEL: bextr64b_load: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: retq %1 = load i64, ptr %x, align 8 %2 = lshr i64 %1, 4 %3 = and i64 %2, 4095 @@ -142,3 +128,5 @@ %and = and i64 %shr, 8589934590 ret i64 %and } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; BEXTR-SLOW: {{.*}} diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -364,32 +364,49 @@ } define i32 @bextr32b(i32 %x) uwtable ssp { -; X86-SLOW-BEXTR-LABEL: bextr32b: +; X86-LABEL: bextr32b: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: retl +; +; X64-LABEL: bextr32b: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: retq + %1 = lshr i32 %x, 4 + %2 = and i32 %1, 4095 + ret i32 %2 +} + +define i32 @bextr32b_no_mov(i32 %x) uwtable ssp { +; X86-SLOW-BEXTR-LABEL: bextr32b_no_mov: ; X86-SLOW-BEXTR: # %bb.0: ; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: shrl $3, %eax ; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X86-SLOW-BEXTR-NEXT: retl ; -; X64-SLOW-BEXTR-LABEL: bextr32b: +; X64-SLOW-BEXTR-LABEL: bextr32b_no_mov: ; X64-SLOW-BEXTR: # %bb.0: ; X64-SLOW-BEXTR-NEXT: movl %edi, %eax -; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: shrl $3, %eax ; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X64-SLOW-BEXTR-NEXT: retq ; -; X86-FAST-BEXTR-LABEL: bextr32b: +; X86-FAST-BEXTR-LABEL: bextr32b_no_mov: ; X86-FAST-BEXTR: # %bb.0: -; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: movl $3075, %eax # imm = 0xC03 ; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-FAST-BEXTR-NEXT: retl ; -; X64-FAST-BEXTR-LABEL: bextr32b: +; X64-FAST-BEXTR-LABEL: bextr32b_no_mov: ; X64-FAST-BEXTR: # %bb.0: -; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: movl $3075, %eax # imm = 0xC03 ; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax ; X64-FAST-BEXTR-NEXT: retq - %1 = lshr i32 %x, 4 + %1 = lshr i32 %x, 3 %2 = and i32 %1, 4095 ret i32 %2 } @@ -412,35 +429,54 @@ } define i32 @bextr32b_load(ptr %x) uwtable ssp { -; X86-SLOW-BEXTR-LABEL: bextr32b_load: +; X86-LABEL: bextr32b_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: retl +; +; X64-LABEL: bextr32b_load: +; X64: # %bb.0: +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: retq + %1 = load i32, ptr %x + %2 = lshr i32 %1, 4 + %3 = and i32 %2, 4095 + ret i32 %3 +} + +define i32 @bextr32_load_no_mov(ptr %x) uwtable ssp { +; X86-SLOW-BEXTR-LABEL: bextr32_load_no_mov: ; X86-SLOW-BEXTR: # %bb.0: ; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax -; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: shrl $3, %eax ; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X86-SLOW-BEXTR-NEXT: retl ; -; X64-SLOW-BEXTR-LABEL: bextr32b_load: +; X64-SLOW-BEXTR-LABEL: bextr32_load_no_mov: ; X64-SLOW-BEXTR: # %bb.0: ; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax -; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: shrl $3, %eax ; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X64-SLOW-BEXTR-NEXT: retq ; -; X86-FAST-BEXTR-LABEL: bextr32b_load: +; X86-FAST-BEXTR-LABEL: bextr32_load_no_mov: ; X86-FAST-BEXTR: # %bb.0: ; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: movl $3075, %ecx # imm = 0xC03 ; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax ; X86-FAST-BEXTR-NEXT: retl ; -; X64-FAST-BEXTR-LABEL: bextr32b_load: +; X64-FAST-BEXTR-LABEL: bextr32_load_no_mov: ; X64-FAST-BEXTR: # %bb.0: -; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: movl $3075, %eax # imm = 0xC03 ; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax ; X64-FAST-BEXTR-NEXT: retq %1 = load i32, ptr %x - %2 = lshr i32 %1, 4 + %2 = lshr i32 %1, 3 %3 = and i32 %2, 4095 ret i32 %3 } @@ -518,10 +554,10 @@ ; X86-LABEL: blsi32_z: ; X86: # %bb.0: ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB25_2 +; X86-NEXT: jne .LBB27_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB25_2: +; X86-NEXT: .LBB27_2: ; X86-NEXT: retl ; ; X64-LABEL: blsi32_z: @@ -625,11 +661,11 @@ ; X86-NEXT: andl %ecx, %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: jne .LBB29_2 +; X86-NEXT: jne .LBB31_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB29_2: +; X86-NEXT: .LBB31_2: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -750,10 +786,10 @@ ; X86-LABEL: blsmsk32_z: ; X86: # %bb.0: ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB34_2 +; X86-NEXT: jne .LBB36_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB34_2: +; X86-NEXT: .LBB36_2: ; X86-NEXT: retl ; ; X64-LABEL: blsmsk32_z: @@ -855,11 +891,11 @@ ; X86-NEXT: xorl %esi, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: jne .LBB38_2 +; X86-NEXT: jne .LBB40_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: .LBB38_2: +; X86-NEXT: .LBB40_2: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -980,10 +1016,10 @@ ; X86-LABEL: blsr32_z: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB43_2 +; X86-NEXT: jne .LBB45_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB43_2: +; X86-NEXT: .LBB45_2: ; X86-NEXT: retl ; ; X64-LABEL: blsr32_z: @@ -1085,11 +1121,11 @@ ; X86-NEXT: andl %esi, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: jne .LBB47_2 +; X86-NEXT: jne .LBB49_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: .LBB47_2: +; X86-NEXT: .LBB49_2: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -1223,20 +1259,20 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-NEXT: testl %eax, %eax -; X86-NEXT: js .LBB52_1 +; X86-NEXT: js .LBB54_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB52_1: +; X86-NEXT: .LBB54_1: ; X86-NEXT: retl ; ; X64-LABEL: pr40060: ; X64: # %bb.0: ; X64-NEXT: bextrl %esi, %edi, %eax ; X64-NEXT: testl %eax, %eax -; X64-NEXT: js .LBB52_1 +; X64-NEXT: js .LBB54_1 ; X64-NEXT: # %bb.2: ; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB52_1: +; X64-NEXT: .LBB54_1: ; X64-NEXT: retq %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1) %4 = icmp sgt i32 %3, -1 @@ -1255,10 +1291,10 @@ ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB53_2 +; X86-NEXT: jne .LBB55_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB53_2: +; X86-NEXT: .LBB55_2: ; X86-NEXT: movl %esi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1270,10 +1306,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsrl %edi, %ebx -; X64-NEXT: jne .LBB53_2 +; X64-NEXT: jne .LBB55_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB53_2: +; X64-NEXT: .LBB55_2: ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1307,10 +1343,10 @@ ; X86-NEXT: andl %ecx, %edi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: orl %edi, %eax -; X86-NEXT: jne .LBB54_2 +; X86-NEXT: jne .LBB56_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB54_2: +; X86-NEXT: .LBB56_2: ; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl %edi, %edx ; X86-NEXT: popl %esi @@ -1325,10 +1361,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsrq %rdi, %rbx -; X64-NEXT: jne .LBB54_2 +; X64-NEXT: jne .LBB56_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB54_2: +; X64-NEXT: .LBB56_2: ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1350,10 +1386,10 @@ ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB55_2 +; X86-NEXT: jne .LBB57_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB55_2: +; X86-NEXT: .LBB57_2: ; X86-NEXT: movl %esi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1365,10 +1401,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsil %edi, %ebx -; X64-NEXT: jne .LBB55_2 +; X64-NEXT: jne .LBB57_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB55_2: +; X64-NEXT: .LBB57_2: ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1402,10 +1438,10 @@ ; X86-NEXT: andl %eax, %edi ; X86-NEXT: movl %edi, %eax ; X86-NEXT: orl %esi, %eax -; X86-NEXT: jne .LBB56_2 +; X86-NEXT: jne .LBB58_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB56_2: +; X86-NEXT: .LBB58_2: ; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl %esi, %edx ; X86-NEXT: popl %esi @@ -1420,10 +1456,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsiq %rdi, %rbx -; X64-NEXT: jne .LBB56_2 +; X64-NEXT: jne .LBB58_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB56_2: +; X64-NEXT: .LBB58_2: ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1444,19 +1480,19 @@ ; X86-LABEL: pr42118_i32: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB57_1 +; X86-NEXT: jne .LBB59_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB57_1: +; X86-NEXT: .LBB59_1: ; X86-NEXT: retl ; ; X64-LABEL: pr42118_i32: ; X64: # %bb.0: ; X64-NEXT: blsrl %edi, %eax -; X64-NEXT: jne .LBB57_1 +; X64-NEXT: jne .LBB59_1 ; X64-NEXT: # %bb.2: ; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB57_1: +; X64-NEXT: .LBB59_1: ; X64-NEXT: retq %tmp = sub i32 0, %x %tmp1 = and i32 %tmp, %x @@ -1484,12 +1520,12 @@ ; X86-NEXT: andl %eax, %edx ; X86-NEXT: andl %ecx, %esi ; X86-NEXT: orl %edx, %esi -; X86-NEXT: jne .LBB58_1 +; X86-NEXT: jne .LBB60_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB58_1: +; X86-NEXT: .LBB60_1: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1498,10 +1534,10 @@ ; X64-LABEL: pr42118_i64: ; X64: # %bb.0: ; X64-NEXT: blsrq %rdi, %rax -; X64-NEXT: jne .LBB58_1 +; X64-NEXT: jne .LBB60_1 ; X64-NEXT: # %bb.2: ; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB58_1: +; X64-NEXT: .LBB60_1: ; X64-NEXT: retq %tmp = sub i64 0, %x %tmp1 = and i64 %tmp, %x @@ -1519,11 +1555,11 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: testl %eax, %eax -; X86-NEXT: jne .LBB59_1 +; X86-NEXT: jne .LBB61_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl -; X86-NEXT: .LBB59_1: +; X86-NEXT: .LBB61_1: ; X86-NEXT: blsil %eax, %eax ; X86-NEXT: retl ; @@ -1552,15 +1588,15 @@ ; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: orl %esi, %edi -; X86-NEXT: jne .LBB60_1 +; X86-NEXT: jne .LBB62_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jmp .LBB60_3 -; X86-NEXT: .LBB60_1: +; X86-NEXT: jmp .LBB62_3 +; X86-NEXT: .LBB62_1: ; X86-NEXT: andl %esi, %edx ; X86-NEXT: andl %ecx, %eax -; X86-NEXT: .LBB60_3: +; X86-NEXT: .LBB62_3: ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/btc_bts_btr.ll b/llvm/test/CodeGen/X86/btc_bts_btr.ll --- a/llvm/test/CodeGen/X86/btc_bts_btr.ll +++ b/llvm/test/CodeGen/X86/btc_bts_btr.ll @@ -983,9 +983,10 @@ ; ; X86-LABEL: bts_32_mask_zeros: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $7, %cl ; X86-NEXT: shlb $2, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btsl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 %n, 2 @@ -1005,9 +1006,10 @@ ; ; X86-LABEL: btc_32_mask_zeros: ; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: andb $7, %cl ; X86-NEXT: shlb $2, %cl -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: btcl %ecx, %eax ; X86-NEXT: retl %1 = shl i32 %n, 2 diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll --- a/llvm/test/CodeGen/X86/combine-bitreverse.ll +++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll @@ -235,10 +235,11 @@ ; X86-NEXT: andl $858993459, %eax # imm = 0x33333333 ; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andl $5592405, %eax # imm = 0x555555 -; X86-NEXT: shll $6, %ecx -; X86-NEXT: andl $-1431655808, %ecx # imm = 0xAAAAAA80 -; X86-NEXT: shll $8, %eax +; X86-NEXT: shrl %eax +; X86-NEXT: andl $22369621, %eax # imm = 0x1555555 +; X86-NEXT: andl $5592405, %ecx # imm = 0x555555 +; X86-NEXT: shll $8, %ecx +; X86-NEXT: shll $7, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: movl %eax, %ecx @@ -273,25 +274,26 @@ ; X64-NEXT: andl $858993459, %eax # imm = 0x33333333 ; X64-NEXT: shrl $2, %edi ; X64-NEXT: andl $858993459, %edi # imm = 0x33333333 -; X64-NEXT: leal (%rdi,%rax,4), %eax -; X64-NEXT: movl %eax, %ecx +; X64-NEXT: leal (%rdi,%rax,4), %ecx +; X64-NEXT: movl %ecx, %eax +; X64-NEXT: shrl %eax +; X64-NEXT: andl $22369621, %eax # imm = 0x1555555 ; X64-NEXT: andl $5592405, %ecx # imm = 0x555555 -; X64-NEXT: shll $6, %eax -; X64-NEXT: andl $-1431655808, %eax # imm = 0xAAAAAA80 ; X64-NEXT: shll $8, %ecx -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: bswapl %ecx -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: andl $986895, %eax # imm = 0xF0F0F -; X64-NEXT: shll $4, %eax -; X64-NEXT: shrl $4, %ecx -; X64-NEXT: andl $135204623, %ecx # imm = 0x80F0F0F -; X64-NEXT: orl %eax, %ecx -; X64-NEXT: movl %ecx, %eax -; X64-NEXT: andl $3355443, %eax # imm = 0x333333 -; X64-NEXT: shrl $2, %ecx -; X64-NEXT: andl $36909875, %ecx # imm = 0x2333333 -; X64-NEXT: leal (%rcx,%rax,4), %eax +; X64-NEXT: shll $7, %eax +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: bswapl %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $986895, %ecx # imm = 0xF0F0F +; X64-NEXT: shll $4, %ecx +; X64-NEXT: shrl $4, %eax +; X64-NEXT: andl $135204623, %eax # imm = 0x80F0F0F +; X64-NEXT: orl %ecx, %eax +; X64-NEXT: movl %eax, %ecx +; X64-NEXT: andl $3355443, %ecx # imm = 0x333333 +; X64-NEXT: shrl $2, %eax +; X64-NEXT: andl $36909875, %eax # imm = 0x2333333 +; X64-NEXT: leal (%rax,%rcx,4), %eax ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $1431655765, %ecx # imm = 0x55555555 ; X64-NEXT: shrl %eax @@ -322,7 +324,9 @@ ; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: andl $357913941, %ecx # imm = 0x15555555 -; X86-NEXT: andl $-1431655766, %eax # imm = 0xAAAAAAAA +; X86-NEXT: shrl %eax +; X86-NEXT: andl $1431655765, %eax # imm = 0x55555555 +; X86-NEXT: addl %eax, %eax ; X86-NEXT: leal (%eax,%ecx,4), %eax ; X86-NEXT: bswapl %eax ; X86-NEXT: movl %eax, %ecx diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -440,11 +440,11 @@ ; CHECK-LABEL: rotl_merge_i5: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal (,%rdi,4), %ecx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb $24, %al -; CHECK-NEXT: shrb $3, %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: leal (,%rdi,4), %eax +; CHECK-NEXT: shrb $3, %dil +; CHECK-NEXT: andb $3, %dil +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r1 = call i5 @llvm.fshl.i5(i5 %x, i5 %x, i5 -1) %r2 = call i5 @llvm.fshl.i5(i5 %r1, i5 %r1, i5 1) diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll --- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll @@ -14,15 +14,15 @@ ; X86-LABEL: test_i8_7_mask_lshr_1: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $6, %al ; X86-NEXT: shrb %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $6, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 7 @@ -34,15 +34,15 @@ ; X86-LABEL: test_i8_28_mask_lshr_1: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al ; X86-NEXT: shrb %al +; X86-NEXT: andb $14, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $14, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -53,15 +53,15 @@ ; X86-LABEL: test_i8_28_mask_lshr_2: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al ; X86-NEXT: shrb $2, %al +; X86-NEXT: andb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_2: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb $2, %al +; X64-NEXT: andb $7, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -72,15 +72,15 @@ ; X86-LABEL: test_i8_28_mask_lshr_3: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $24, %al ; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $24, %al ; X64-NEXT: shrb $3, %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -91,15 +91,15 @@ ; X86-LABEL: test_i8_28_mask_lshr_4: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $16, %al ; X86-NEXT: shrb $4, %al +; X86-NEXT: andb $1, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $16, %al ; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $1, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -130,15 +130,15 @@ ; X86-LABEL: test_i8_224_mask_lshr_4: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $-32, %al ; X86-NEXT: shrb $4, %al +; X86-NEXT: andb $14, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_lshr_4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $-32, %al ; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $14, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 224 @@ -186,15 +186,15 @@ ; X86-LABEL: test_i8_7_mask_ashr_1: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $6, %al ; X86-NEXT: shrb %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $6, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 7 @@ -206,15 +206,15 @@ ; X86-LABEL: test_i8_28_mask_ashr_1: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al ; X86-NEXT: shrb %al +; X86-NEXT: andb $14, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $14, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -225,15 +225,15 @@ ; X86-LABEL: test_i8_28_mask_ashr_2: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al ; X86-NEXT: shrb $2, %al +; X86-NEXT: andb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_2: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb $2, %al +; X64-NEXT: andb $7, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -244,15 +244,15 @@ ; X86-LABEL: test_i8_28_mask_ashr_3: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $24, %al ; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $24, %al ; X64-NEXT: shrb $3, %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -263,15 +263,15 @@ ; X86-LABEL: test_i8_28_mask_ashr_4: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $16, %al ; X86-NEXT: shrb $4, %al +; X86-NEXT: andb $1, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $16, %al ; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $1, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -533,17 +533,17 @@ define i16 @test_i16_127_mask_lshr_1(i16 %a0) { ; X86-LABEL: test_i16_127_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $126, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $63, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $126, %eax ; X64-NEXT: shrl %eax +; X64-NEXT: andl $63, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 127 @@ -554,17 +554,17 @@ define i16 @test_i16_2032_mask_lshr_3(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_lshr_3: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $254, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_lshr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $2032, %eax # imm = 0x7F0 ; X64-NEXT: shrl $3, %eax +; X64-NEXT: andl $254, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 2032 @@ -636,36 +636,38 @@ ; X86-LABEL: test_i16_65024_mask_lshr_1: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $65024, %eax # imm = 0xFE00 ; X86-NEXT: shrl %eax +; X86-NEXT: andl $32512, %eax # imm = 0x7F00 ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $65024, %eax # imm = 0xFE00 ; X64-NEXT: shrl %eax +; X64-NEXT: andl $32512, %eax # imm = 0x7F00 ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 65024 %t1 = lshr i16 %t0, 1 ret i16 %t1 } + +; Explicit `movzbl 5(%esp), %eax` for X86 because the exact value is +; necessary to optimize out the `shr`. define i16 @test_i16_65024_mask_lshr_8(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_lshr_8: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $65024, %eax # imm = 0xFE00 -; X86-NEXT: shrl $8, %eax +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $-2, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $65024, %eax # imm = 0xFE00 ; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $254, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 65024 @@ -714,17 +716,17 @@ define i16 @test_i16_127_mask_ashr_1(i16 %a0) { ; X86-LABEL: test_i16_127_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $126, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $63, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $126, %eax ; X64-NEXT: shrl %eax +; X64-NEXT: andl $63, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 127 @@ -735,17 +737,17 @@ define i16 @test_i16_2032_mask_ashr_3(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_ashr_3: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $254, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_ashr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $2032, %eax # imm = 0x7F0 ; X64-NEXT: shrl $3, %eax +; X64-NEXT: andl $254, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 2032 @@ -897,17 +899,16 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) { ; X86-LABEL: test_i16_127_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $127, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl %eax, %eax +; X86-NEXT: movzbl %al, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_shl_1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $127, %edi -; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 127 @@ -1082,16 +1083,16 @@ define i32 @test_i32_32767_mask_lshr_1(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl $32766, %eax # imm = 0x7FFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $32766, %eax # imm = 0x7FFE ; X64-NEXT: shrl %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 32767 %t1 = lshr i32 %t0, 1 @@ -1101,16 +1102,16 @@ define i32 @test_i32_8388352_mask_lshr_7(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_7: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $7, %eax +; X86-NEXT: andl $65534, %eax # imm = 0xFFFE ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_7: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $7, %eax +; X64-NEXT: andl $65534, %eax # imm = 0xFFFE ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 7 @@ -1119,16 +1120,16 @@ define i32 @test_i32_8388352_mask_lshr_8(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_8: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $32767, %eax # imm = 0x7FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 8 @@ -1137,16 +1138,16 @@ define i32 @test_i32_8388352_mask_lshr_9(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_9: ; X86: # %bb.0: -; X86-NEXT: movl $8388096, %eax # imm = 0x7FFE00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $9, %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_9: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388096, %eax # imm = 0x7FFE00 ; X64-NEXT: shrl $9, %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 9 @@ -1155,16 +1156,16 @@ define i32 @test_i32_8388352_mask_lshr_10(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_10: ; X86: # %bb.0: -; X86-NEXT: movl $8387584, %eax # imm = 0x7FFC00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $10, %eax +; X86-NEXT: andl $8191, %eax # imm = 0x1FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_10: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8387584, %eax # imm = 0x7FFC00 ; X64-NEXT: shrl $10, %eax +; X64-NEXT: andl $8191, %eax # imm = 0x1FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 10 @@ -1189,19 +1190,22 @@ %t1 = lshr i32 %t0, 1 ret i32 %t1 } + + +; Explicit `movzwl 6(%esp), %eax` for X86 because the exact value is +; necessary to optimize out the `shr`. define i32 @test_i32_4294836224_mask_lshr_16(i32 %a0) { ; X86-LABEL: test_i32_4294836224_mask_lshr_16: ; X86: # %bb.0: -; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $16, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $-2, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_lshr_16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $-131072, %eax # imm = 0xFFFE0000 ; X64-NEXT: shrl $16, %eax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i32 %a0, 4294836224 %t1 = lshr i32 %t0, 16 @@ -1245,16 +1249,16 @@ define i32 @test_i32_32767_mask_ashr_1(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movl $32766, %eax # imm = 0x7FFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $32766, %eax # imm = 0x7FFE ; X64-NEXT: shrl %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 32767 %t1 = ashr i32 %t0, 1 @@ -1264,16 +1268,16 @@ define i32 @test_i32_8388352_mask_ashr_7(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_7: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $7, %eax +; X86-NEXT: andl $65534, %eax # imm = 0xFFFE ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_7: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $7, %eax +; X64-NEXT: andl $65534, %eax # imm = 0xFFFE ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 7 @@ -1282,16 +1286,16 @@ define i32 @test_i32_8388352_mask_ashr_8(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_8: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $32767, %eax # imm = 0x7FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 8 @@ -1300,16 +1304,16 @@ define i32 @test_i32_8388352_mask_ashr_9(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_9: ; X86: # %bb.0: -; X86-NEXT: movl $8388096, %eax # imm = 0x7FFE00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $9, %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_9: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388096, %eax # imm = 0x7FFE00 ; X64-NEXT: shrl $9, %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 9 @@ -1318,16 +1322,16 @@ define i32 @test_i32_8388352_mask_ashr_10(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_10: ; X86: # %bb.0: -; X86-NEXT: movl $8387584, %eax # imm = 0x7FFC00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $10, %eax +; X86-NEXT: andl $8191, %eax # imm = 0x1FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_10: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8387584, %eax # imm = 0x7FFC00 ; X64-NEXT: shrl $10, %eax +; X64-NEXT: andl $8191, %eax # imm = 0x1FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 10 @@ -1408,16 +1412,15 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: addl %eax, %eax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_shl_1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $32767, %edi # imm = 0x7FFF -; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 1 @@ -1575,17 +1578,17 @@ define i64 @test_i64_2147483647_mask_lshr_1(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: andl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-NEXT: shrq %rax +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X64-NEXT: retq %t0 = and i64 %a0, 2147483647 %t1 = lshr i64 %t0, 1 @@ -1604,9 +1607,9 @@ ; ; X64-LABEL: test_i64_140737488289792_mask_lshr_15: ; X64: # %bb.0: -; X64-NEXT: movabsq $140737488289792, %rax # imm = 0x7FFFFFFF0000 -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $15, %rax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i64 %a0, 140737488289792 %t1 = lshr i64 %t0, 15 @@ -1701,9 +1704,9 @@ ; ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_32: ; X64: # %bb.0: -; X64-NEXT: movabsq $-8589934592, %rax # imm = 0xFFFFFFFE00000000 -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $32, %rax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i64 %a0, 18446744065119617024 %t1 = lshr i64 %t0, 32 @@ -1749,17 +1752,17 @@ define i64 @test_i64_2147483647_mask_ashr_1(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: andl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-NEXT: shrq %rax +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X64-NEXT: retq %t0 = and i64 %a0, 2147483647 %t1 = ashr i64 %t0, 1 @@ -1778,9 +1781,9 @@ ; ; X64-LABEL: test_i64_140737488289792_mask_ashr_15: ; X64: # %bb.0: -; X64-NEXT: movabsq $140737488289792, %rax # imm = 0x7FFFFFFF0000 -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $15, %rax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i64 %a0, 140737488289792 %t1 = ashr i64 %t0, 15 @@ -1933,8 +1936,7 @@ ; ; X64-LABEL: test_i64_2147483647_mask_shl_1: ; X64: # %bb.0: -; X64-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF -; X64-NEXT: leaq (%rdi,%rdi), %rax +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: retq %t0 = and i64 %a0, 2147483647 %t1 = shl i64 %t0, 1 diff --git a/llvm/test/CodeGen/X86/const-shift-with-and.ll b/llvm/test/CodeGen/X86/const-shift-with-and.ll --- a/llvm/test/CodeGen/X86/const-shift-with-and.ll +++ b/llvm/test/CodeGen/X86/const-shift-with-and.ll @@ -97,16 +97,14 @@ define i32 @shr_and_to_mask_i32(i32 %x) nounwind { ; X86-LABEL: shr_and_to_mask_i32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $6, %eax -; X86-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-NEXT: retl ; ; X64-LABEL: shr_and_to_mask_i32: ; X64: # %bb.0: -; X64-NEXT: movl %edi, %eax +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: shrl $6, %eax -; X64-NEXT: andl $1023, %eax # imm = 0x3FF ; X64-NEXT: retq %shr = lshr i32 %x, 6 %and = and i32 %shr, 1023 @@ -116,17 +114,16 @@ define i64 @and_shl_to_mask_i64(i64 %x) nounwind { ; X86-LABEL: and_shl_to_mask_i64: ; X86: # %bb.0: -; X86-NEXT: movl $511, %eax # imm = 0x1FF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $7, %eax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: and_shl_to_mask_i64: ; X64: # %bb.0: -; X64-NEXT: movq %rdi, %rax -; X64-NEXT: andl $511, %eax # imm = 0x1FF -; X64-NEXT: shlq $7, %rax +; X64-NEXT: shll $7, %edi +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq %and = and i64 %x, 511 %shl = shl i64 %and, 7 @@ -195,17 +192,17 @@ define i64 @and_shr_to_shrink_i64(i64 %x) nounwind { ; X86-LABEL: and_shr_to_shrink_i64: ; X86: # %bb.0: -; X86-NEXT: movl $64704, %eax # imm = 0xFCC0 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shrl $6, %eax +; X86-NEXT: andl $1011, %eax # imm = 0x3F3 ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: and_shr_to_shrink_i64: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: andl $64704, %eax # imm = 0xFCC0 ; X64-NEXT: shrq $6, %rax +; X64-NEXT: andl $1011, %eax # imm = 0x3F3 ; X64-NEXT: retq %and = and i64 %x, 64704 %shr = lshr i64 %and, 6 @@ -215,16 +212,16 @@ define i32 @shl_and_to_shrink_i32(i32 %x) nounwind { ; X86-LABEL: shl_and_to_shrink_i32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $511, %eax # imm = 0x1FF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $8, %eax -; X86-NEXT: andl $130816, %eax # imm = 0x1FF00 ; X86-NEXT: retl ; ; X64-LABEL: shl_and_to_shrink_i32: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax +; X64-NEXT: andl $511, %eax # imm = 0x1FF ; X64-NEXT: shll $8, %eax -; X64-NEXT: andl $130816, %eax # imm = 0x1FF00 ; X64-NEXT: retq %shl = shl i32 %x, 8 %and = and i32 %shl, 131071 @@ -312,9 +309,9 @@ define i64 @shr_and_from_shrink32_i64(i64 %x) nounwind { ; X86-LABEL: shr_and_from_shrink32_i64: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $8192, %eax # imm = 0x2000 +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $16, %eax -; X86-NEXT: andl $536870912, %eax # imm = 0x20000000 ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; @@ -376,8 +373,8 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %edx ; X86-NEXT: shrl $27, %edx +; X86-NEXT: andl $89501588, %eax # imm = 0x555AF94 ; X86-NEXT: shll $5, %eax -; X86-NEXT: andl $-1430916480, %eax # imm = 0xAAB5F280 ; X86-NEXT: andl $-4, %edx ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll --- a/llvm/test/CodeGen/X86/fold-and-shift.ll +++ b/llvm/test/CodeGen/X86/fold-and-shift.ll @@ -4,10 +4,9 @@ define i32 @t1(ptr %X, i32 %i) { ; CHECK-LABEL: t1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: movl (%eax,%ecx,4), %eax +; CHECK-NEXT: movl (%ecx,%eax,4), %eax ; CHECK-NEXT: retl entry: @@ -22,9 +21,9 @@ ; CHECK-LABEL: t2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movzwl %cx, %ecx -; CHECK-NEXT: movl (%eax,%ecx,4), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: movl (%eax,%ecx,2), %eax ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/limited-prec.ll b/llvm/test/CodeGen/X86/limited-prec.ll --- a/llvm/test/CodeGen/X86/limited-prec.ll +++ b/llvm/test/CodeGen/X86/limited-prec.ll @@ -318,8 +318,8 @@ ; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision6-NEXT: movl %ecx, (%esp) -; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: movzbl %al, %eax ; precision6-NEXT: addl $-127, %eax ; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision6-NEXT: flds (%esp) @@ -342,8 +342,8 @@ ; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision12-NEXT: movl %ecx, (%esp) -; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: movzbl %al, %eax ; precision12-NEXT: addl $-127, %eax ; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision12-NEXT: flds (%esp) @@ -370,8 +370,8 @@ ; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision18-NEXT: movl %ecx, (%esp) -; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: movzbl %al, %eax ; precision18-NEXT: addl $-127, %eax ; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision18-NEXT: flds (%esp) @@ -410,8 +410,8 @@ ; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision6-NEXT: movl %ecx, (%esp) -; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: movzbl %al, %eax ; precision6-NEXT: addl $-127, %eax ; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision6-NEXT: flds (%esp) @@ -432,8 +432,8 @@ ; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision12-NEXT: movl %ecx, (%esp) -; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: movzbl %al, %eax ; precision12-NEXT: addl $-127, %eax ; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision12-NEXT: flds (%esp) @@ -458,8 +458,8 @@ ; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision18-NEXT: movl %ecx, (%esp) -; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: movzbl %al, %eax ; precision18-NEXT: addl $-127, %eax ; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision18-NEXT: flds (%esp) @@ -496,8 +496,8 @@ ; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision6-NEXT: movl %ecx, (%esp) -; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: movzbl %al, %eax ; precision6-NEXT: addl $-127, %eax ; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision6-NEXT: flds (%esp) @@ -520,8 +520,8 @@ ; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision12-NEXT: movl %ecx, (%esp) -; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: movzbl %al, %eax ; precision12-NEXT: addl $-127, %eax ; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision12-NEXT: flds (%esp) @@ -546,8 +546,8 @@ ; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision18-NEXT: movl %ecx, (%esp) -; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: movzbl %al, %eax ; precision18-NEXT: addl $-127, %eax ; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision18-NEXT: flds (%esp) diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -3853,8 +3853,6 @@ ; SSE-NEXT: shrl $15, %ecx ; SSE-NEXT: movl %eax, %edx ; SSE-NEXT: shrl $8, %edx -; SSE-NEXT: andl $1, %edx -; SSE-NEXT: andl $8, %eax ; SSE-NEXT: shrl $3, %eax ; SSE-NEXT: xorl %edx, %eax ; SSE-NEXT: andl %ecx, %eax @@ -3869,8 +3867,6 @@ ; AVX1OR2-NEXT: shrl $15, %ecx ; AVX1OR2-NEXT: movl %eax, %edx ; AVX1OR2-NEXT: shrl $8, %edx -; AVX1OR2-NEXT: andl $1, %edx -; AVX1OR2-NEXT: andl $8, %eax ; AVX1OR2-NEXT: shrl $3, %eax ; AVX1OR2-NEXT: xorl %edx, %eax ; AVX1OR2-NEXT: andl %ecx, %eax @@ -3975,8 +3971,8 @@ ; SSE-NEXT: movmskps %xmm1, %eax ; SSE-NEXT: movl %eax, %ecx ; SSE-NEXT: shrb $3, %cl -; SSE-NEXT: andb $4, %al ; SSE-NEXT: shrb $2, %al +; SSE-NEXT: andb $1, %al ; SSE-NEXT: xorb %cl, %al ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -3987,8 +3983,8 @@ ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax ; AVX1OR2-NEXT: movl %eax, %ecx ; AVX1OR2-NEXT: shrb $3, %cl -; AVX1OR2-NEXT: andb $4, %al ; AVX1OR2-NEXT: shrb $2, %al +; AVX1OR2-NEXT: andb $1, %al ; AVX1OR2-NEXT: xorb %cl, %al ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax ; AVX1OR2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll --- a/llvm/test/CodeGen/X86/pr15267.ll +++ b/llvm/test/CodeGen/X86/pr15267.ll @@ -85,19 +85,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: andl $15, %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: vmovd %edx, %xmm0 +; CHECK-NEXT: vmovd %ecx, %xmm0 +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $8, %ecx ; CHECK-NEXT: andl $15, %ecx ; CHECK-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movzwl %ax, %ecx ; CHECK-NEXT: shrl $12, %ecx -; CHECK-NEXT: andl $15, %ecx ; CHECK-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $16, %ecx diff --git a/llvm/test/CodeGen/X86/pr26350.ll b/llvm/test/CodeGen/X86/pr26350.ll --- a/llvm/test/CodeGen/X86/pr26350.ll +++ b/llvm/test/CodeGen/X86/pr26350.ll @@ -11,8 +11,8 @@ ; CHECK-NEXT: movl d, %eax ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $31, %ecx +; CHECK-NEXT: andl $8, %eax ; CHECK-NEXT: addl %eax, %eax -; CHECK-NEXT: andl $16, %eax ; CHECK-NEXT: cmpl $-1, %eax ; CHECK-NEXT: sbbl $0, %ecx ; CHECK-NEXT: setb %al diff --git a/llvm/test/CodeGen/X86/pr32282.ll b/llvm/test/CodeGen/X86/pr32282.ll --- a/llvm/test/CodeGen/X86/pr32282.ll +++ b/llvm/test/CodeGen/X86/pr32282.ll @@ -36,10 +36,11 @@ ; X64-LABEL: foo: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: movq d(%rip), %rcx -; X64-NEXT: movabsq $3013716102212485120, %rdx # imm = 0x29D2DED3DE400000 -; X64-NEXT: andnq %rdx, %rcx, %rcx -; X64-NEXT: shrq $21, %rcx +; X64-NEXT: movq d(%rip), %rdx +; X64-NEXT: notq %rdx +; X64-NEXT: shrq $21, %rdx +; X64-NEXT: movabsq $1437051821810, %rcx # imm = 0x14E96F69EF2 +; X64-NEXT: andq %rdx, %rcx ; X64-NEXT: addq $7, %rcx ; X64-NEXT: movq %rdi, %rdx ; X64-NEXT: orq %rcx, %rdx diff --git a/llvm/test/CodeGen/X86/pr45995.ll b/llvm/test/CodeGen/X86/pr45995.ll --- a/llvm/test/CodeGen/X86/pr45995.ll +++ b/llvm/test/CodeGen/X86/pr45995.ll @@ -18,11 +18,11 @@ ; CHECK-NEXT: mov ebx, edi ; CHECK-NEXT: shr bl, 3 ; CHECK-NEXT: mov ebp, edi -; CHECK-NEXT: and bpl, 4 ; CHECK-NEXT: shr bpl, 2 +; CHECK-NEXT: and bpl, 1 ; CHECK-NEXT: mov r14d, edi -; CHECK-NEXT: and r14b, 2 ; CHECK-NEXT: shr r14b +; CHECK-NEXT: and r14b, 1 ; CHECK-NEXT: call print_i1@PLT ; CHECK-NEXT: movzx edi, r14b ; CHECK-NEXT: call print_i1@PLT @@ -78,21 +78,21 @@ ; CHECK-NEXT: shr al, 3 ; CHECK-NEXT: mov byte ptr [rsp + 7], al # 1-byte Spill ; CHECK-NEXT: mov r14d, ebx -; CHECK-NEXT: and r14b, 4 ; CHECK-NEXT: shr r14b, 2 +; CHECK-NEXT: and r14b, 1 ; CHECK-NEXT: mov r15d, ebx -; CHECK-NEXT: and r15b, 2 ; CHECK-NEXT: shr r15b +; CHECK-NEXT: and r15b, 1 ; CHECK-NEXT: vpslld xmm0, xmm0, 31 ; CHECK-NEXT: vmovmskps edi, xmm0 ; CHECK-NEXT: mov r12d, edi ; CHECK-NEXT: shr r12b, 3 ; CHECK-NEXT: mov r13d, edi -; CHECK-NEXT: and r13b, 4 ; CHECK-NEXT: shr r13b, 2 +; CHECK-NEXT: and r13b, 1 ; CHECK-NEXT: mov ebp, edi -; CHECK-NEXT: and bpl, 2 ; CHECK-NEXT: shr bpl +; CHECK-NEXT: and bpl, 1 ; CHECK-NEXT: call print_i1@PLT ; CHECK-NEXT: movzx edi, bpl ; CHECK-NEXT: call print_i1@PLT diff --git a/llvm/test/CodeGen/X86/pull-binop-through-shift.ll b/llvm/test/CodeGen/X86/pull-binop-through-shift.ll --- a/llvm/test/CodeGen/X86/pull-binop-through-shift.ll +++ b/llvm/test/CodeGen/X86/pull-binop-through-shift.ll @@ -217,9 +217,9 @@ ; X86-LABEL: and_nosignbit_lshr: ; X86: # %bb.0: ; X86-NEXT: movl 8(%esp), %ecx -; X86-NEXT: movl $2147418112, %eax # imm = 0x7FFF0000 -; X86-NEXT: andl 4(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X86-NEXT: movl %eax, (%ecx) ; X86-NEXT: retl %t0 = and i32 %x, 2147418112 ; 0x7FFF0000 @@ -398,9 +398,9 @@ ; X86-LABEL: and_nosignbit_ashr: ; X86: # %bb.0: ; X86-NEXT: movl 8(%esp), %ecx -; X86-NEXT: movl $2147418112, %eax # imm = 0x7FFF0000 -; X86-NEXT: andl 4(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X86-NEXT: movl %eax, (%ecx) ; X86-NEXT: retl %t0 = and i32 %x, 2147418112 ; 0x7FFF0000 diff --git a/llvm/test/CodeGen/X86/rev16.ll b/llvm/test/CodeGen/X86/rev16.ll --- a/llvm/test/CodeGen/X86/rev16.ll +++ b/llvm/test/CodeGen/X86/rev16.ll @@ -29,22 +29,22 @@ define i32 @not_rev16(i32 %a) { ; X86-LABEL: not_rev16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: shll $8, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrl $8, %ecx ; X86-NEXT: andl $65280, %ecx # imm = 0xFF00 -; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000 +; X86-NEXT: andl $65280, %eax # imm = 0xFF00 +; X86-NEXT: shll $8, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: not_rev16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $8, %eax -; X64-NEXT: shrl $8, %edi +; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $65280, %eax # imm = 0xFF00 ; X64-NEXT: andl $65280, %edi # imm = 0xFF00 -; X64-NEXT: andl $16711680, %eax # imm = 0xFF0000 +; X64-NEXT: shll $8, %edi ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %l8 = shl i32 %a, 8 @@ -116,11 +116,11 @@ define i32 @different_shift_amount(i32 %a) { ; X86-LABEL: different_shift_amount: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $9, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: shrl $8, %eax -; X86-NEXT: andl $-16712192, %ecx # imm = 0xFF00FE00 +; X86-NEXT: andl $8355967, %ecx # imm = 0x7F807F +; X86-NEXT: shll $9, %ecx ; X86-NEXT: andl $16711935, %eax # imm = 0xFF00FF ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl @@ -128,10 +128,10 @@ ; X64-LABEL: different_shift_amount: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $9, %eax -; X64-NEXT: shrl $8, %edi -; X64-NEXT: andl $-16712192, %eax # imm = 0xFF00FE00 -; X64-NEXT: andl $16711935, %edi # imm = 0xFF00FF +; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $8355967, %edi # imm = 0x7F807F +; X64-NEXT: shll $9, %edi +; X64-NEXT: andl $16711935, %eax # imm = 0xFF00FF ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %l8 = shl i32 %a, 9 diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll --- a/llvm/test/CodeGen/X86/rotate-extract.ll +++ b/llvm/test/CodeGen/X86/rotate-extract.ll @@ -166,7 +166,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andl $-8, %eax +; X86-NEXT: andl $120, %eax ; X86-NEXT: shll $25, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl @@ -175,7 +175,7 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl $9, %eax -; X64-NEXT: andl $-8, %edi +; X64-NEXT: andl $120, %edi ; X64-NEXT: shll $25, %edi ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll @@ -169,9 +169,8 @@ define i16 @sel_shift_bool_i16(i1 %t) { ; ANY-LABEL: sel_shift_bool_i16: ; ANY: # %bb.0: -; ANY-NEXT: movl %edi, %eax -; ANY-NEXT: andl $1, %eax -; ANY-NEXT: shll $7, %eax +; ANY-NEXT: shll $7, %edi +; ANY-NEXT: movzbl %dil, %eax ; ANY-NEXT: # kill: def $ax killed $ax killed $eax ; ANY-NEXT: retq %shl = select i1 %t, i16 128, i16 0 diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll --- a/llvm/test/CodeGen/X86/setcc.ll +++ b/llvm/test/CodeGen/X86/setcc.ll @@ -281,9 +281,8 @@ ; X86-LABEL: shift_and: ; X86: ## %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $4, %al -; X86-NEXT: shrb $2, %al -; X86-NEXT: movzbl %al, %eax +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $1, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll --- a/llvm/test/CodeGen/X86/shift-amount-mod.ll +++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll @@ -1556,9 +1556,8 @@ define i16 @sh_trunc_sh(i64 %x) { ; X32-LABEL: sh_trunc_sh: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: shrl $4, %eax -; X32-NEXT: andl $15, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll --- a/llvm/test/CodeGen/X86/shift-mask.ll +++ b/llvm/test/CodeGen/X86/shift-mask.ll @@ -113,17 +113,17 @@ define i16 @test_i16_shl_lshr_1(i16 %a0) { ; X86-LABEL: test_i16_shl_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $16376, %eax # imm = 0x3FF8 ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $65504, %eax # imm = 0xFFE0 ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-MASK-LABEL: test_i16_shl_lshr_1: ; X64-MASK: # %bb.0: ; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andl $16376, %edi # imm = 0x3FF8 ; X64-MASK-NEXT: leal (,%rdi,4), %eax -; X64-MASK-NEXT: andl $65504, %eax # imm = 0xFFE0 ; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax ; X64-MASK-NEXT: retq ; @@ -365,15 +365,15 @@ ; X86-LABEL: test_i8_lshr_lshr_2: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andb $7, %al ; X86-NEXT: shlb $2, %al -; X86-NEXT: andb $28, %al ; X86-NEXT: retl ; ; X64-MASK-LABEL: test_i8_lshr_lshr_2: ; X64-MASK: # %bb.0: ; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andb $7, %dil ; X64-MASK-NEXT: leal (,%rdi,4), %eax -; X64-MASK-NEXT: andb $28, %al ; X64-MASK-NEXT: # kill: def $al killed $al killed $eax ; X64-MASK-NEXT: retq ; @@ -425,26 +425,13 @@ ; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax ; X64-MASK-NEXT: retq ; -; X64-SHIFT2-LABEL: test_i16_lshr_lshr_1: -; X64-SHIFT2: # %bb.0: -; X64-SHIFT2-NEXT: movl %edi, %eax -; X64-SHIFT2-NEXT: shrl $2, %eax -; X64-SHIFT2-NEXT: andl $2047, %eax # imm = 0x7FF -; X64-SHIFT2-NEXT: # kill: def $ax killed $ax killed $eax -; X64-SHIFT2-NEXT: retq -; -; X64-TBM-LABEL: test_i16_lshr_lshr_1: -; X64-TBM: # %bb.0: -; X64-TBM-NEXT: bextrl $2818, %edi, %eax # imm = 0xB02 -; X64-TBM-NEXT: # kill: def $ax killed $ax killed $eax -; X64-TBM-NEXT: retq -; -; X64-BMI-LABEL: test_i16_lshr_lshr_1: -; X64-BMI: # %bb.0: -; X64-BMI-NEXT: movl $2818, %eax # imm = 0xB02 -; X64-BMI-NEXT: bextrl %eax, %edi, %eax -; X64-BMI-NEXT: # kill: def $ax killed $ax killed $eax -; X64-BMI-NEXT: retq +; X64-SHIFT-LABEL: test_i16_lshr_lshr_1: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: shll $3, %edi +; X64-SHIFT-NEXT: movzwl %di, %eax +; X64-SHIFT-NEXT: shrl $5, %eax +; X64-SHIFT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SHIFT-NEXT: retq %1 = shl i16 %a0, 3 %2 = lshr i16 %1, 5 ret i16 %2 @@ -453,19 +440,27 @@ define i16 @test_i16_lshr_lshr_2(i16 %a0) { ; X86-LABEL: test_i16_lshr_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2047, %eax # imm = 0x7FF ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $8188, %eax # imm = 0x1FFC ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; -; X64-LABEL: test_i16_lshr_lshr_2: -; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: andl $8188, %eax # imm = 0x1FFC -; X64-NEXT: # kill: def $ax killed $ax killed $eax -; X64-NEXT: retq +; X64-MASK-LABEL: test_i16_lshr_lshr_2: +; X64-MASK: # %bb.0: +; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andl $2047, %edi # imm = 0x7FF +; X64-MASK-NEXT: leal (,%rdi,4), %eax +; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax +; X64-MASK-NEXT: retq +; +; X64-SHIFT-LABEL: test_i16_lshr_lshr_2: +; X64-SHIFT: # %bb.0: +; X64-SHIFT-NEXT: shll $5, %edi +; X64-SHIFT-NEXT: movzwl %di, %eax +; X64-SHIFT-NEXT: shrl $3, %eax +; X64-SHIFT-NEXT: # kill: def $ax killed $ax killed $eax +; X64-SHIFT-NEXT: retq %1 = shl i16 %a0, 5 %2 = lshr i16 %1, 3 ret i16 %2 @@ -517,16 +512,16 @@ define i32 @test_i32_lshr_lshr_2(i32 %a0) { ; X86-LABEL: test_i32_lshr_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $134217727, %eax # imm = 0x7FFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC ; X86-NEXT: retl ; ; X64-MASK-LABEL: test_i32_lshr_lshr_2: ; X64-MASK: # %bb.0: ; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andl $134217727, %edi # imm = 0x7FFFFFF ; X64-MASK-NEXT: leal (,%rdi,4), %eax -; X64-MASK-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC ; X64-MASK-NEXT: retq ; ; X64-SHIFT-LABEL: test_i32_lshr_lshr_2: @@ -620,9 +615,9 @@ ; ; X64-MASK-LABEL: test_i64_lshr_lshr_2: ; X64-MASK: # %bb.0: -; X64-MASK-NEXT: leaq (,%rdi,4), %rcx -; X64-MASK-NEXT: movabsq $2305843009213693948, %rax # imm = 0x1FFFFFFFFFFFFFFC -; X64-MASK-NEXT: andq %rcx, %rax +; X64-MASK-NEXT: movabsq $576460752303423487, %rax # imm = 0x7FFFFFFFFFFFFFF +; X64-MASK-NEXT: andq %rdi, %rax +; X64-MASK-NEXT: shlq $2, %rax ; X64-MASK-NEXT: retq ; ; X64-SHIFT-LABEL: test_i64_lshr_lshr_2: @@ -635,3 +630,5 @@ %2 = lshr i64 %1, 3 ret i64 %2 } +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; X64-BMI: {{.*}} diff --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll --- a/llvm/test/CodeGen/X86/sttni.ll +++ b/llvm/test/CodeGen/X86/sttni.ll @@ -315,11 +315,10 @@ ; X86-NEXT: jmp .LBB8_3 ; X86-NEXT: .LBB8_2: # %compare ; X86-NEXT: movdqa %xmm0, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: .LBB8_3: # %exit ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movl %ebp, %esp @@ -452,11 +451,10 @@ ; X86-NEXT: jmp .LBB11_3 ; X86-NEXT: .LBB11_2: # %compare ; X86-NEXT: movdqa %xmm1, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: .LBB11_3: # %exit ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: leal -4(%ebp), %esp @@ -772,11 +770,10 @@ ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $48, %esp ; X86-NEXT: movdqa %xmm0, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: movzwl %ax, %eax @@ -889,11 +886,10 @@ ; X86-NEXT: jmp .LBB23_3 ; X86-NEXT: .LBB23_2: # %compare ; X86-NEXT: movdqa %xmm1, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: .LBB23_3: # %exit ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movl %ebp, %esp diff --git a/llvm/test/CodeGen/X86/tbm_patterns.ll b/llvm/test/CodeGen/X86/tbm_patterns.ll --- a/llvm/test/CodeGen/X86/tbm_patterns.ll +++ b/llvm/test/CodeGen/X86/tbm_patterns.ll @@ -4,7 +4,8 @@ define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -26,7 +27,8 @@ define i32 @test_x86_tbm_bextri_u32_m(ptr nocapture %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_m: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, (%rdi), %eax # imm = 0xC04 +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = load i32, ptr %a %t1 = lshr i32 %t0, 4 @@ -37,7 +39,8 @@ define i32 @test_x86_tbm_bextri_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 @@ -51,7 +54,7 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: testl $65520, %edi # imm = 0xFFF0 ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 @@ -65,7 +68,8 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u32_sle: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %ecx +; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq @@ -79,7 +83,8 @@ define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -101,7 +106,8 @@ define i64 @test_x86_tbm_bextri_u64_m(ptr nocapture %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_m: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, (%rdi), %eax # imm = 0xC04 +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = load i64, ptr %a %t1 = lshr i64 %t0, 4 @@ -112,7 +118,8 @@ define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 @@ -126,7 +133,7 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: testl $65520, %edi # imm = 0xFFF0 ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 @@ -140,7 +147,8 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u64_sle: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %ecx +; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll --- a/llvm/test/CodeGen/X86/udiv_fix.ll +++ b/llvm/test/CodeGen/X86/udiv_fix.ll @@ -90,9 +90,8 @@ ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %eax, %eax -; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -27,8 +27,7 @@ ; X86-LABEL: func: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $8, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %ecx @@ -109,9 +108,8 @@ ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %eax, %eax -; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx @@ -284,15 +282,14 @@ ; X86-LABEL: func7: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: shll $17, %edx -; X86-NEXT: shrl $15, %ecx -; X86-NEXT: andl $1, %ecx +; X86-NEXT: shrl $15, %edx +; X86-NEXT: shll $17, %ecx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx +; X86-NEXT: pushl %ecx ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -34,8 +34,8 @@ ; X86-NEXT: imull $115043767, {{[0-9]+}}(%esp), %eax # imm = 0x6DB6DB7 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $26, %ecx -; X86-NEXT: andl $134217726, %eax # imm = 0x7FFFFFE ; X86-NEXT: shrl %eax +; X86-NEXT: andl $67108863, %eax # imm = 0x3FFFFFF ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF ; X86-NEXT: cmpl $9586981, %eax # imm = 0x924925 @@ -47,8 +47,8 @@ ; X64-NEXT: imull $115043767, %edi, %eax # imm = 0x6DB6DB7 ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: shll $26, %ecx -; X64-NEXT: andl $134217726, %eax # imm = 0x7FFFFFE ; X64-NEXT: shrl %eax +; X64-NEXT: andl $67108863, %eax # imm = 0x3FFFFFF ; X64-NEXT: orl %ecx, %eax ; X64-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF ; X64-NEXT: cmpl $9586981, %eax # imm = 0x924925 @@ -111,8 +111,8 @@ ; X86-NEXT: imull $683, {{[0-9]+}}(%esp), %eax # imm = 0x2AB ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $10, %ecx -; X86-NEXT: andl $2046, %eax # imm = 0x7FE ; X86-NEXT: shrl %eax +; X86-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: andl $2047, %eax # imm = 0x7FF ; X86-NEXT: cmpl $342, %eax # imm = 0x156 diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -652,8 +652,8 @@ ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: movq %xmm0, %rax -; SSE-NEXT: andl $1, %eax -; SSE-NEXT: shlq $15, %rax +; SSE-NEXT: shll $15, %eax +; SSE-NEXT: movzwl %ax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: vselect_any_extend_vector_inreg_crash: @@ -661,8 +661,8 @@ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: andl $1, %eax -; AVX-NEXT: shlq $15, %rax +; AVX-NEXT: shll $15, %eax +; AVX-NEXT: movzwl %ax, %eax ; AVX-NEXT: retq 0: %1 = load <8 x i8>, ptr %x diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll --- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll +++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll @@ -7,8 +7,8 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: andl $15, %eax ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $60, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ;