diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -4013,6 +4013,21 @@ return true; } + /// Return true if it is profitable to combine the two shifts Shift1 and + /// Shift2 across Op at Level. For example fold (shl (and (srl x, c1), c3), + /// c2) -> (and (srl x, (sub c1, c2), (c3 << c2)). + /// + /// @param OuterShift outer shift Node + /// @param Op to combine shifts across + /// @param InnerShift inner shift Node + /// @param Level the current DAGCombine legalization level. + virtual bool isDesirableToCombineShiftsAcross(const SDNode *OuterShift, + const SDNode *Op, + const SDNode *InnerShift, + CombineLevel Level) const { + return false; + } + /// Return true if the target has native support for the specified value type /// and it is 'desirable' to use the type for the given node type. e.g. On x86 /// i16 is legal, but undesirable since i16 instruction encodings are longer diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -9282,7 +9282,8 @@ } } - if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) { + if (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA || + N0.getOpcode() == ISD::AND) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); @@ -9295,7 +9296,7 @@ // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 >= C2 - if (N0->getFlags().hasExact()) { + if (N0.getOpcode() != ISD::AND && N0->getFlags().hasExact()) { if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, /*AllowUndefs*/ false, /*AllowTypeMismatch*/ true)) { @@ -9318,6 +9319,9 @@ // folding this will increase the total number of instructions. if (N0.getOpcode() == ISD::SRL && (N0.getOperand(1) == N1 || N0.hasOneUse()) && + // TODO: If there is already an existing mask (either use of N or + // operand of N0) then we should do this fold regardless of + // `shouldFoldConstantShiftPairToMask`. TLI.shouldFoldConstantShiftPairToMask(N, Level)) { if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, /*AllowUndefs*/ false, @@ -9341,6 +9345,39 @@ return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); } } + + // fold (shl (and (srl x, c1), c3), c2) -> + // (and (srl x, (sub c1, c2), (c3 << c2)) or + // (and (shl x, (sub c2, c1), (c3 << c2)) + if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (dyn_cast(N0.getOperand(1))) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SRL && N00.hasOneUse() && + TLI.isDesirableToCombineShiftsAcross(N, N0.getNode(), N00.getNode(), + Level)) { + SDLoc DL(N); + SDValue N001 = DAG.getZExtOrTrunc(N00.getOperand(1), DL, ShiftVT); + if (ISD::matchBinaryPredicate(N1, N001, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N001, N1); + SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(1), N1); + SDValue Shift = + DAG.getNode(ISD::SRL, DL, VT, N00.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N001, N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N001); + SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(1), N1); + SDValue Shift = + DAG.getNode(ISD::SHL, DL, VT, N00.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + } + } + } } // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) @@ -9825,9 +9862,7 @@ // fold (srl (shl x, c1), c2) -> (and (shl x, (sub c1, c2), MASK) or // (and (srl x, (sub c2, c1), MASK) - if (N0.getOpcode() == ISD::SHL && - (N0.getOperand(1) == N1 || N0->hasOneUse()) && - TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + if (N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::AND) { auto MatchShiftAmount = [OpSizeInBits](ConstantSDNode *LHS, ConstantSDNode *RHS) { const APInt &LHSC = LHS->getAPIntValue(); @@ -9835,28 +9870,66 @@ return LHSC.ult(OpSizeInBits) && RHSC.ult(OpSizeInBits) && LHSC.getZExtValue() <= RHSC.getZExtValue(); }; - if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, - /*AllowUndefs*/ false, - /*AllowTypeMismatch*/ true)) { - SDLoc DL(N); - SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); - SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); - SDValue Mask = DAG.getAllOnesConstant(DL, VT); - Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); - Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); - SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); - return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); - } - if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, - /*AllowUndefs*/ false, - /*AllowTypeMismatch*/ true)) { - SDLoc DL(N); - SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); - SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); - SDValue Mask = DAG.getAllOnesConstant(DL, VT); - Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); - SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); - return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + SDLoc DL(N); + if (N0.getOpcode() == ISD::SHL && + (N0.getOperand(1) == N1 || N0->hasOneUse()) && + // TODO: If there is already an existing mask (either use of N or + // operand of N0) then we should do this fold regardless of + // `shouldFoldConstantShiftPairToMask`. + TLI.shouldFoldConstantShiftPairToMask(N, Level)) { + if (ISD::matchBinaryPredicate(N1, N0.getOperand(1), MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N01, N1); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N01); + Mask = DAG.getNode(ISD::SHL, DL, VT, Mask, Diff); + SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N0.getOperand(1), N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue N01 = DAG.getZExtOrTrunc(N0.getOperand(1), DL, ShiftVT); + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N01); + SDValue Mask = DAG.getAllOnesConstant(DL, VT); + Mask = DAG.getNode(ISD::SRL, DL, VT, Mask, N1); + SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + } + + // fold (srl (and (shl x, c1), c3), c2) -> + // (and (shl x, (sub c1, c2), (c3 >> c2)) or + // (and (srl x, (sub c2, c1), (c3 >> c2)) + if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { + if (dyn_cast(N0.getOperand(1))) { + SDValue N00 = N0.getOperand(0); + if (N00.getOpcode() == ISD::SHL && N00.hasOneUse() && + TLI.isDesirableToCombineShiftsAcross(N, N0.getNode(), N00.getNode(), + Level)) { + SDValue N001 = DAG.getZExtOrTrunc(N00.getOperand(1), DL, ShiftVT); + if (ISD::matchBinaryPredicate(N1, N001, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N001, N1); + SDValue Mask = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(1), N1); + SDValue Shift = + DAG.getNode(ISD::SHL, DL, VT, N00.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + if (ISD::matchBinaryPredicate(N001, N1, MatchShiftAmount, + /*AllowUndefs*/ false, + /*AllowTypeMismatch*/ true)) { + SDValue Diff = DAG.getNode(ISD::SUB, DL, ShiftVT, N1, N001); + SDValue Mask = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(1), N1); + SDValue Shift = + DAG.getNode(ISD::SRL, DL, VT, N00.getOperand(0), Diff); + return DAG.getNode(ISD::AND, DL, VT, Shift, Mask); + } + } + } } } diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -1040,6 +1040,14 @@ SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override; + bool isDesirableToCombineShiftsAcross(const SDNode *OuterShift, + const SDNode *Op, + const SDNode *InnerShift, + CombineLevel Level) const override; + + bool isDesirableToCommuteWithShift(const SDNode *N, + CombineLevel Level) const override; + /// Return true if the target has native support for /// the specified value type and it is 'desirable' to use the type for the /// given node type. e.g. On x86 i16 is legal, but undesirable since i16 diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37752,6 +37752,7 @@ return false; // Replace the constant with the zero extend mask. + // TODO: We should also look into sign-extending here. SDLoc DL(Op); SDValue NewC = TLO.DAG.getConstant(ZeroExtendMask, DL, VT); SDValue NewOp = TLO.DAG.getNode(ISD::AND, DL, VT, Op.getOperand(0), NewC); @@ -47726,7 +47727,277 @@ return DAG.getNode(ExtOpc, DL, VT, Mulh); } -static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG) { +static unsigned findImplicitModUsage(SDNode *N, + SmallPtrSet *SeenNodes) { + if (!SeenNodes->insert(N).second) + return 0; + for (auto U : N->uses()) { + unsigned Opc = U->getOpcode(); + if ((Opc == ISD::SHL || Opc == ISD::SRL || Opc == ISD::SRA || + Opc == X86ISD::BT) && + N == U->getOperand(1).getNode()) + return U->getOperand(0).getValueSizeInBits() - 1; + + if (ISD::isExtOpcode(Opc) || Opc == ISD::TRUNCATE || Opc == ISD::BITCAST) { + if (unsigned ModSize = findImplicitModUsage(U, SeenNodes)) + return ModSize; + } + } + + return 0; +} + +static unsigned getImplicitModMask(SDNode *N) { + SmallPtrSet SeenNodes; + return findImplicitModUsage(N, &SeenNodes); +} + +static bool isMaskAlreadyIdeal(unsigned DesiredMask, bool PreserveANDN, + const APInt &AndMask) { + if (PreserveANDN && AndMask.getSignificantBits() <= 32) + return true; + + if (DesiredMask) { + // Check if we already have the desired (shift-amount) mask. + if (AndMask == DesiredMask) + return true; + if ((AndMask | (AndMask - 1)) == DesiredMask) + return true; + } else if (AndMask == UINT8_MAX || AndMask == UINT16_MAX || + AndMask == UINT32_MAX) { + // Already have ideal mask + return true; + } + + return false; +} + +static bool isNewAndMaskPreferable(unsigned DesiredMask, bool PreserveANDN, + bool PreferShrunk, const APInt &OldAndMask, + const APInt &NewAndMask) { + // Several conditions for whether we want to do the swap: + // 1. NewAndMask == DesiredMask + // 2. PreserveANDN and we save a movabs + // 3. !PreserveANDN and we are able to create `movl/movzwl/movzbl` form mask. + // 4. !PreserveANDN and we are able to shrink the mask (potentially saving + // `movabs` or getting imm8 form). + + // Case 1: NewAndMask == DesiredMask + if (DesiredMask && NewAndMask == DesiredMask) + return true; + + // Case 2: PreserveANDN and we save a movabs + if (NewAndMask.getSignificantBits() <= 32 && + OldAndMask.getSignificantBits() > 32) + return true; + + if (PreserveANDN) + return false; + + // Case 3: We are able to create `movl/movzwl/movzbl` form mask. + if (NewAndMask.isMask()) { + if (NewAndMask == UINT8_MAX || NewAndMask == UINT16_MAX || + NewAndMask == UINT32_MAX) + return true; + } + + // Case 4: We are able to shrink the mask. + return PreferShrunk && + NewAndMask.getSignificantBits() < OldAndMask.getSignificantBits(); +} + +// Try and re-ordering (srl/shl (and X, C0), C1) -> (and (srl/shl X, C1), C0 +// <> C1) if the new mask (C0 <> C1) can be more efficiently represented +// than the original C0. Note this only tries to optimize by re-ordering. +static SDValue +combineLogicalShiftWithAnd(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // Only do this on the last DAG combine as it can interfere with other + // combines. This is also necessary to avoid and infinite loop between this + // and `DAGCombiner::visitShiftByConstant`. + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDNode *AndOp; + SDValue RawVal; + unsigned Opc = N->getOpcode(); + bool PreserveANDN = false; + assert((Opc == ISD::SRL || Opc == ISD::SHL) && + "Invalid node to combine shift with and"); + + // Get the shift amount, only proceed if it's constant. + auto *ShiftC = dyn_cast(N->getOperand(1)); + if (!ShiftC) + return SDValue(); + + // Get AndOp and RawVal (RawVal being the shifted amount assuming no + // and). + AndOp = N->getOperand(0).getNode(); + if (AndOp->getOpcode() != ISD::AND || !AndOp->hasOneUse()) + return SDValue(); + + // Get the `and` mask and RawVal if we didn't get it earlier. + ConstantSDNode *AndC; + for (unsigned Idx = 0; Idx < 2; ++Idx) { + AndC = dyn_cast(AndOp->getOperand(Idx)); + if (AndC) { + RawVal = AndOp->getOperand(1 - Idx); + break; + } + } + + // Only proceeed if we have a constant mask. + if (!AndC) + return SDValue(); + + EVT VT = RawVal.getValueType(); + // TODO: Makes sense to do this on vector types if it allows us to use a mask + // thats easier to create. Note, if this changes to include vector types + // update `isDesirableToCommuteWithShift`. + if (!VT.isScalarInteger()) + return SDValue(); + + SDLoc DL(N); + + // Check if the `and` is used as a shift-amount. If so, try and generate a + // shift-amount mask (which can be eliminated). + unsigned DesiredMask = getImplicitModMask(N); + APInt AndMask = AndC->getAPIntValue(); + unsigned ShiftCnt = ShiftC->getZExtValue(); + + // Don't reorder if we have an `andn` pattern unless we will be able to + // eliminate a `movabs`. + PreserveANDN = + isBitwiseNot(RawVal) && DAG.getTargetLoweringInfo().hasAndNot(RawVal); + + // Check if mask is already good. Either in `movl/movzwl/movzbl` form, setup + // for `andn` or already equal to `DesiredMask`. + if (isMaskAlreadyIdeal(DesiredMask, PreserveANDN, AndMask)) + return SDValue(); + + // Determine Mask if we swap order of `srl/shl` and `and`. + APInt NewAndMask; + if (Opc == ISD::SHL) { + NewAndMask = AndMask.shl(ShiftCnt); + NewAndMask.setLowBits(ShiftCnt); + // Set high bits for fair comparison. + APInt SExtMask = AndMask; + SExtMask.setHighBits(ShiftCnt); + if (SExtMask.getSignificantBits() < AndMask.getSignificantBits()) + AndMask = SExtMask; + } else { + // TODO: This can/should be `ashr`, but there are several other places that + // undo sign-extended constants so making a replacement that relies on + // sign-extension can end up as a net negative. + NewAndMask = AndMask.lshr(ShiftCnt); + } + + if (isNewAndMaskPreferable(DesiredMask, PreserveANDN, + /* PreferShrunk */ true, AndMask, NewAndMask)) + return DAG.getNode( + ISD::AND, DL, VT, + DAG.getNode(Opc, DL, VT, RawVal, DAG.getConstant(ShiftCnt, DL, VT)), + DAG.getConstant(NewAndMask, DL, VT)); + + return SDValue(); +} + +// Try and re-ordering (and (srl/shl X, C0), C1) -> (srl/shl (and X, C1 <> +// C0), C0) if the new mask (C1 <> C0) can be more efficiently represented +// than the original C1. Note this only tries to optimize by re-ordering. +static SDValue +combineAndWithLogicalShift(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { + // Only do this on the last DAG combine as it can interfere with other + // combines. This is also necessary to avoid and infinite loop between this + // and `DAGCombiner::visitShiftByConstant`. + if (!DCI.isAfterLegalizeDAG()) + return SDValue(); + + SDNode *ShiftOp; + SDValue RawVal; + assert(N->getOpcode() == ISD::AND && + "Invalid node to combine shift with and"); + + // Get ShiftOp, AndOp and RawVal (RawVal being the shifted amount assuming + // no and). + unsigned Idx; + ConstantSDNode *AndC; + for (Idx = 0; Idx < 2; ++Idx) { + ShiftOp = N->getOperand(Idx).getNode(); + if ((ShiftOp->getOpcode() == ISD::SRL || + ShiftOp->getOpcode() == ISD::SHL) && + ShiftOp->hasOneUse()) { + AndC = dyn_cast(N->getOperand(1 - Idx)); + RawVal = ShiftOp->getOperand(0); + break; + } + } + + // We either could not find a shift or the and mask is non-constant. + if (Idx == 2 || !AndC) + return SDValue(); + + // Only proceed if shift-amount is constant. + auto *ShiftC = dyn_cast(ShiftOp->getOperand(1)); + if (!ShiftC) + return SDValue(); + + EVT VT = RawVal.getValueType(); + // TODO: Makes sense to do this on vector types if it allows us to use a + // mask thats easier to create. + if (!VT.isScalarInteger()) + return SDValue(); + + SDLoc DL(N); + unsigned ShiftCnt = ShiftC->getZExtValue(); + unsigned DesiredMask = getImplicitModMask(N); + APInt AndMask = AndC->getAPIntValue(); + + // This is a hack. Avoid reordering if RawVal is potentially alive after the + // and/shift and the shift can be emitted as a LEA. This avoid causing a + // slight scheduling regression by emitting mov + shift instead of just LEA. + // Note, its possible to save some move instructions by adding similiar logic + // in 'combineLogicalShiftWithAnd'. + // TODO: Add a pass after ISel to reorder shifts if they can be transformed to + // LEA to save move instructions and remove this hack. + bool PreferShrunk = !(ShiftOp->getOpcode() == ISD::SHL && ShiftCnt <= 3 && + !RawVal.hasOneUse()); + + // Check if mask is already good. Either in `movl/movzwl/movzbl` form, setup + // for `andn` or already equal to `DesiredMask`. + if (isMaskAlreadyIdeal(DesiredMask, /* PreserveANDN */ false, AndMask)) + return SDValue(); + + // Determine Mask if we swap order of `srl/shl` and `and`. + APInt NewAndMask; + if (ShiftOp->getOpcode() == ISD::SHL) { + // TODO: This can/should be `ashr`, but there are several other places that + // undo sign-extended constants so making a replacement that relies on + // sign-extension can end up as a net negative. + NewAndMask = AndMask.lshr(ShiftCnt); + } else { + NewAndMask = AndMask.shl(ShiftCnt); + NewAndMask.setLowBits(ShiftCnt); + // Set high bits for fair comparison. + APInt SExtMask = AndMask; + SExtMask.setHighBits(ShiftCnt); + if (SExtMask.getSignificantBits() < AndMask.getSignificantBits()) + AndMask = SExtMask; + } + + if (isNewAndMaskPreferable(DesiredMask, /* PreserveANDN */ false, + PreferShrunk, AndMask, NewAndMask)) + return DAG.getNode(ShiftOp->getOpcode(), DL, VT, + DAG.getNode(ISD::AND, DL, VT, RawVal, + DAG.getConstant(NewAndMask, DL, VT)), + DAG.getConstant(ShiftCnt, DL, VT)); + + return SDValue(); +} + +static SDValue combineShiftLeft(SDNode *N, SelectionDAG &DAG, + TargetLowering::DAGCombinerInfo &DCI) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); @@ -47768,6 +48039,9 @@ } } + if (SDValue V = combineLogicalShiftWithAnd(N, DAG, DCI)) + return V; + return SDValue(); } @@ -47830,10 +48104,6 @@ static SDValue combineShiftRightLogical(SDNode *N, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - EVT VT = N0.getValueType(); - if (SDValue V = combineShiftToPMULH(N, DAG, Subtarget)) return V; @@ -47842,41 +48112,9 @@ if (!DCI.isAfterLegalizeDAG()) return SDValue(); - // Try to improve a sequence of srl (and X, C1), C2 by inverting the order. - // TODO: This is a generic DAG combine that became an x86-only combine to - // avoid shortcomings in other folds such as bswap, bit-test ('bt'), and - // and-not ('andn'). - if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) - return SDValue(); - - auto *ShiftC = dyn_cast(N1); - auto *AndC = dyn_cast(N0.getOperand(1)); - if (!ShiftC || !AndC) - return SDValue(); - - // If we can shrink the constant mask below 8-bits or 32-bits, then this - // transform should reduce code size. It may also enable secondary transforms - // from improved known-bits analysis or instruction selection. - APInt MaskVal = AndC->getAPIntValue(); - - // If this can be matched by a zero extend, don't optimize. - if (MaskVal.isMask()) { - unsigned TO = MaskVal.countTrailingOnes(); - if (TO >= 8 && isPowerOf2_32(TO)) - return SDValue(); - } + if (SDValue V = combineLogicalShiftWithAnd(N, DAG, DCI)) + return V; - APInt NewMaskVal = MaskVal.lshr(ShiftC->getAPIntValue()); - unsigned OldMaskSize = MaskVal.getMinSignedBits(); - unsigned NewMaskSize = NewMaskVal.getMinSignedBits(); - if ((OldMaskSize > 8 && NewMaskSize <= 8) || - (OldMaskSize > 32 && NewMaskSize <= 32)) { - // srl (and X, AndC), ShiftC --> and (srl X, ShiftC), (AndC >> ShiftC) - SDLoc DL(N); - SDValue NewMask = DAG.getConstant(NewMaskVal, DL, VT); - SDValue NewShift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), N1); - return DAG.getNode(ISD::AND, DL, VT, NewShift, NewMask); - } return SDValue(); } @@ -49324,6 +49562,9 @@ } } + if (SDValue V = combineAndWithLogicalShift(N, DAG, DCI)) + return V; + return SDValue(); } @@ -56376,7 +56617,7 @@ case X86ISD::SBB: return combineSBB(N, DAG); case X86ISD::ADC: return combineADC(N, DAG, DCI); case ISD::MUL: return combineMul(N, DAG, DCI, Subtarget); - case ISD::SHL: return combineShiftLeft(N, DAG); + case ISD::SHL: return combineShiftLeft(N, DAG, DCI); case ISD::SRA: return combineShiftRightArithmetic(N, DAG, Subtarget); case ISD::SRL: return combineShiftRightLogical(N, DAG, DCI, Subtarget); case ISD::AND: return combineAnd(N, DAG, DCI, Subtarget); @@ -56531,6 +56772,47 @@ return SDValue(); } +// At the moment, always worth it to save the instruction. Maybe some edge cases +// where a small shift left could fold with complex-addressing, but so far not +// seen. +bool X86TargetLowering::isDesirableToCombineShiftsAcross( + const SDNode *OuterShift, const SDNode *Op, const SDNode *InnerShift, + CombineLevel Level) const { + (void)OuterShift; + (void)Op; + (void)InnerShift; + (void)Level; + return true; +} + +// In `combineLogicalShiftWithAnd` reorder (shift (and x, c0), c1) -> (and +// (shift x, c1), c0 <> c1) if the new constant `c0 <> c1` is "better" +// than the original `c1`. This is done after legalize dag, so if we are at +// that level don't allow commute if its a pattern we already handle (otherwise +// we can either get an infinite loop or worse codegen). +bool X86TargetLowering::isDesirableToCommuteWithShift( + const SDNode *N, CombineLevel Level) const { + if (Level < AfterLegalizeDAG) + return true; + + if (N->getOpcode() != ISD::SRL && N->getOpcode() != ISD::SHL) + return true; + if (dyn_cast(N->getOperand(1)) == nullptr) + return true; + + SDNode *AndOp = N->getOperand(0).getNode(); + if (AndOp->getOpcode() != ISD::AND || !AndOp->hasOneUse() || + !AndOp->getValueType(0).isScalarInteger()) + return true; + + for (unsigned Idx = 0; Idx < 2; ++Idx) { + if (dyn_cast(AndOp->getOperand(Idx)) != nullptr) + return false; + } + + return true; +} + bool X86TargetLowering::isTypeDesirableForOp(unsigned Opc, EVT VT) const { if (!isTypeLegal(VT)) return false; diff --git a/llvm/test/CodeGen/X86/avx512-calling-conv.ll b/llvm/test/CodeGen/X86/avx512-calling-conv.ll --- a/llvm/test/CodeGen/X86/avx512-calling-conv.ll +++ b/llvm/test/CodeGen/X86/avx512-calling-conv.ll @@ -959,12 +959,12 @@ ; KNL-NEXT: orl %r9d, %r8d ; KNL-NEXT: andl $1, %r10d ; KNL-NEXT: shll $6, %r10d -; KNL-NEXT: andl $1, %r11d ; KNL-NEXT: shll $7, %r11d -; KNL-NEXT: orl %r10d, %r11d +; KNL-NEXT: movzbl %r11b, %ecx +; KNL-NEXT: orl %r10d, %ecx ; KNL-NEXT: andl $1, %ebx ; KNL-NEXT: shll $8, %ebx -; KNL-NEXT: orl %r11d, %ebx +; KNL-NEXT: orl %ecx, %ebx ; KNL-NEXT: andl $1, %r14d ; KNL-NEXT: shll $9, %r14d ; KNL-NEXT: orl %ebx, %r14d @@ -983,11 +983,11 @@ ; KNL-NEXT: andl $1, %edx ; KNL-NEXT: shll $14, %edx ; KNL-NEXT: orl %r13d, %edx -; KNL-NEXT: andl $1, %esi ; KNL-NEXT: shll $15, %esi -; KNL-NEXT: orl %edx, %esi -; KNL-NEXT: orl %ebp, %esi -; KNL-NEXT: movw %si, (%rax) +; KNL-NEXT: movzwl %si, %ecx +; KNL-NEXT: orl %edx, %ecx +; KNL-NEXT: orl %ebp, %ecx +; KNL-NEXT: movw %cx, (%rax) ; KNL-NEXT: popq %rbx ; KNL-NEXT: popq %r12 ; KNL-NEXT: popq %r13 @@ -1272,12 +1272,12 @@ ; SKX-NEXT: orl %r9d, %r8d ; SKX-NEXT: andl $1, %r10d ; SKX-NEXT: shll $6, %r10d -; SKX-NEXT: andl $1, %r11d ; SKX-NEXT: shll $7, %r11d -; SKX-NEXT: orl %r10d, %r11d +; SKX-NEXT: movzbl %r11b, %ecx +; SKX-NEXT: orl %r10d, %ecx ; SKX-NEXT: andl $1, %ebx ; SKX-NEXT: shll $8, %ebx -; SKX-NEXT: orl %r11d, %ebx +; SKX-NEXT: orl %ecx, %ebx ; SKX-NEXT: andl $1, %r14d ; SKX-NEXT: shll $9, %r14d ; SKX-NEXT: orl %ebx, %r14d @@ -1296,11 +1296,11 @@ ; SKX-NEXT: andl $1, %edx ; SKX-NEXT: shll $14, %edx ; SKX-NEXT: orl %r13d, %edx -; SKX-NEXT: andl $1, %esi ; SKX-NEXT: shll $15, %esi -; SKX-NEXT: orl %edx, %esi -; SKX-NEXT: orl %ebp, %esi -; SKX-NEXT: movw %si, (%rax) +; SKX-NEXT: movzwl %si, %ecx +; SKX-NEXT: orl %edx, %ecx +; SKX-NEXT: orl %ebp, %ecx +; SKX-NEXT: movw %cx, (%rax) ; SKX-NEXT: popq %rbx ; SKX-NEXT: popq %r12 ; SKX-NEXT: popq %r13 @@ -1551,84 +1551,85 @@ ; KNL_X32-NEXT: kandw %k2, %k0, %k0 ; KNL_X32-NEXT: kmovw %eax, %k2 ; KNL_X32-NEXT: kandw %k1, %k2, %k1 -; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %ebp ; KNL_X32-NEXT: kmovw %k1, %ebx ; KNL_X32-NEXT: kshiftrw $1, %k0, %k1 -; KNL_X32-NEXT: kmovw %k1, %ebp +; KNL_X32-NEXT: kmovw %k1, %eax ; KNL_X32-NEXT: kshiftrw $2, %k0, %k1 -; KNL_X32-NEXT: kmovw %k1, %esi -; KNL_X32-NEXT: kshiftrw $3, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %edi +; KNL_X32-NEXT: kshiftrw $3, %k0, %k1 +; KNL_X32-NEXT: kmovw %k1, %esi ; KNL_X32-NEXT: kshiftrw $4, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %edx ; KNL_X32-NEXT: kshiftrw $5, %k0, %k1 ; KNL_X32-NEXT: kmovw %k1, %ecx ; KNL_X32-NEXT: kshiftrw $6, %k0, %k1 ; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: movb %bl, 2(%eax) +; KNL_X32-NEXT: movb %bl, 2(%ebp) ; KNL_X32-NEXT: kmovw %k0, %ebx ; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: andl $1, %ebp -; KNL_X32-NEXT: leal (%ebx,%ebp,2), %ebx -; KNL_X32-NEXT: kmovw %k1, %ebp +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: leal (%ebx,%eax,2), %eax +; KNL_X32-NEXT: kmovw %k1, %ebx ; KNL_X32-NEXT: kshiftrw $7, %k0, %k1 +; KNL_X32-NEXT: andl $1, %edi +; KNL_X32-NEXT: leal (%eax,%edi,4), %edi +; KNL_X32-NEXT: kmovw %k1, %eax +; KNL_X32-NEXT: kshiftrw $8, %k0, %k1 ; KNL_X32-NEXT: andl $1, %esi -; KNL_X32-NEXT: leal (%ebx,%esi,4), %ebx +; KNL_X32-NEXT: leal (%edi,%esi,8), %edi ; KNL_X32-NEXT: kmovw %k1, %esi -; KNL_X32-NEXT: kshiftrw $8, %k0, %k1 -; KNL_X32-NEXT: andl $1, %edi -; KNL_X32-NEXT: leal (%ebx,%edi,8), %ebx -; KNL_X32-NEXT: kmovw %k1, %edi ; KNL_X32-NEXT: kshiftrw $9, %k0, %k1 ; KNL_X32-NEXT: andl $1, %edx ; KNL_X32-NEXT: shll $4, %edx -; KNL_X32-NEXT: orl %ebx, %edx -; KNL_X32-NEXT: kmovw %k1, %ebx +; KNL_X32-NEXT: orl %edi, %edx +; KNL_X32-NEXT: kmovw %k1, %edi ; KNL_X32-NEXT: kshiftrw $10, %k0, %k1 ; KNL_X32-NEXT: andl $1, %ecx ; KNL_X32-NEXT: shll $5, %ecx ; KNL_X32-NEXT: orl %edx, %ecx ; KNL_X32-NEXT: kmovw %k1, %edx ; KNL_X32-NEXT: kshiftrw $11, %k0, %k1 -; KNL_X32-NEXT: andl $1, %ebp -; KNL_X32-NEXT: shll $6, %ebp +; KNL_X32-NEXT: andl $1, %ebx +; KNL_X32-NEXT: shll $6, %ebx +; KNL_X32-NEXT: shll $7, %eax +; KNL_X32-NEXT: movzbl %al, %eax +; KNL_X32-NEXT: orl %ebx, %eax +; KNL_X32-NEXT: kmovw %k1, %ebx +; KNL_X32-NEXT: kshiftrw $12, %k0, %k1 ; KNL_X32-NEXT: andl $1, %esi -; KNL_X32-NEXT: shll $7, %esi -; KNL_X32-NEXT: orl %ebp, %esi +; KNL_X32-NEXT: shll $8, %esi +; KNL_X32-NEXT: orl %eax, %esi ; KNL_X32-NEXT: kmovw %k1, %ebp -; KNL_X32-NEXT: kshiftrw $12, %k0, %k1 +; KNL_X32-NEXT: kshiftrw $13, %k0, %k1 ; KNL_X32-NEXT: andl $1, %edi -; KNL_X32-NEXT: shll $8, %edi +; KNL_X32-NEXT: shll $9, %edi ; KNL_X32-NEXT: orl %esi, %edi -; KNL_X32-NEXT: kmovw %k1, %esi -; KNL_X32-NEXT: kshiftrw $13, %k0, %k1 -; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: shll $9, %ebx -; KNL_X32-NEXT: orl %edi, %ebx -; KNL_X32-NEXT: kmovw %k1, %edi +; KNL_X32-NEXT: kmovw %k1, %eax ; KNL_X32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_X32-NEXT: andl $1, %edx ; KNL_X32-NEXT: shll $10, %edx -; KNL_X32-NEXT: orl %ebx, %edx -; KNL_X32-NEXT: kmovw %k1, %ebx +; KNL_X32-NEXT: orl %edi, %edx +; KNL_X32-NEXT: kmovw %k1, %esi ; KNL_X32-NEXT: kshiftrw $15, %k0, %k0 ; KNL_X32-NEXT: orl %ecx, %edx ; KNL_X32-NEXT: kmovw %k0, %ecx +; KNL_X32-NEXT: andl $1, %ebx +; KNL_X32-NEXT: shll $11, %ebx ; KNL_X32-NEXT: andl $1, %ebp -; KNL_X32-NEXT: shll $11, %ebp +; KNL_X32-NEXT: shll $12, %ebp +; KNL_X32-NEXT: orl %ebx, %ebp +; KNL_X32-NEXT: andl $1, %eax +; KNL_X32-NEXT: shll $13, %eax +; KNL_X32-NEXT: orl %ebp, %eax ; KNL_X32-NEXT: andl $1, %esi -; KNL_X32-NEXT: shll $12, %esi -; KNL_X32-NEXT: orl %ebp, %esi -; KNL_X32-NEXT: andl $1, %edi -; KNL_X32-NEXT: shll $13, %edi -; KNL_X32-NEXT: orl %esi, %edi -; KNL_X32-NEXT: andl $1, %ebx -; KNL_X32-NEXT: shll $14, %ebx -; KNL_X32-NEXT: orl %edi, %ebx -; KNL_X32-NEXT: andl $1, %ecx +; KNL_X32-NEXT: shll $14, %esi +; KNL_X32-NEXT: orl %eax, %esi ; KNL_X32-NEXT: shll $15, %ecx -; KNL_X32-NEXT: orl %ebx, %ecx +; KNL_X32-NEXT: movzwl %cx, %ecx +; KNL_X32-NEXT: orl %esi, %ecx ; KNL_X32-NEXT: orl %edx, %ecx +; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax ; KNL_X32-NEXT: movw %cx, (%eax) ; KNL_X32-NEXT: addl $16, %esp ; KNL_X32-NEXT: popl %esi @@ -1913,12 +1914,12 @@ ; FASTISEL-NEXT: orl %r9d, %r8d ; FASTISEL-NEXT: andl $1, %r10d ; FASTISEL-NEXT: shll $6, %r10d -; FASTISEL-NEXT: andl $1, %r11d ; FASTISEL-NEXT: shll $7, %r11d -; FASTISEL-NEXT: orl %r10d, %r11d +; FASTISEL-NEXT: movzbl %r11b, %ecx +; FASTISEL-NEXT: orl %r10d, %ecx ; FASTISEL-NEXT: andl $1, %ebx ; FASTISEL-NEXT: shll $8, %ebx -; FASTISEL-NEXT: orl %r11d, %ebx +; FASTISEL-NEXT: orl %ecx, %ebx ; FASTISEL-NEXT: andl $1, %r14d ; FASTISEL-NEXT: shll $9, %r14d ; FASTISEL-NEXT: orl %ebx, %r14d @@ -1937,11 +1938,11 @@ ; FASTISEL-NEXT: andl $1, %edx ; FASTISEL-NEXT: shll $14, %edx ; FASTISEL-NEXT: orl %r13d, %edx -; FASTISEL-NEXT: andl $1, %esi ; FASTISEL-NEXT: shll $15, %esi -; FASTISEL-NEXT: orl %edx, %esi -; FASTISEL-NEXT: orl %ebp, %esi -; FASTISEL-NEXT: movw %si, (%rax) +; FASTISEL-NEXT: movzwl %si, %ecx +; FASTISEL-NEXT: orl %edx, %ecx +; FASTISEL-NEXT: orl %ebp, %ecx +; FASTISEL-NEXT: movw %cx, (%rax) ; FASTISEL-NEXT: popq %rbx ; FASTISEL-NEXT: popq %r12 ; FASTISEL-NEXT: popq %r13 diff --git a/llvm/test/CodeGen/X86/bitreverse.ll b/llvm/test/CodeGen/X86/bitreverse.ll --- a/llvm/test/CodeGen/X86/bitreverse.ll +++ b/llvm/test/CodeGen/X86/bitreverse.ll @@ -513,38 +513,38 @@ define i4 @test_bitreverse_i4(i4 %a) { ; X86-LABEL: test_bitreverse_i4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: andb $8, %al -; X86-NEXT: movl %ecx, %edx -; X86-NEXT: addb %cl, %dl -; X86-NEXT: andb $4, %dl -; X86-NEXT: movb %cl, %ah -; X86-NEXT: shlb $3, %ah -; X86-NEXT: andb $8, %ah -; X86-NEXT: orb %dl, %ah +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx +; X86-NEXT: addb %al, %cl +; X86-NEXT: andb $4, %cl +; X86-NEXT: movl %eax, %edx +; X86-NEXT: shlb $3, %dl +; X86-NEXT: andb $8, %dl +; X86-NEXT: orb %cl, %dl +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrb %cl ; X86-NEXT: andb $2, %cl -; X86-NEXT: orb %ah, %cl +; X86-NEXT: orb %dl, %cl ; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $1, %al ; X86-NEXT: orb %cl, %al ; X86-NEXT: retl ; ; X64-LABEL: test_bitreverse_i4: ; X64: # %bb.0: ; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: andb $4, %al +; X64-NEXT: leal (,%rdi,8), %ecx +; X64-NEXT: andb $8, %cl +; X64-NEXT: orb %al, %cl ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $8, %al -; X64-NEXT: leal (%rdi,%rdi), %ecx -; X64-NEXT: andb $4, %cl -; X64-NEXT: leal (,%rdi,8), %edx -; X64-NEXT: andb $8, %dl -; X64-NEXT: orb %cl, %dl -; X64-NEXT: shrb %dil -; X64-NEXT: andb $2, %dil -; X64-NEXT: orb %dil, %dl -; X64-NEXT: shrb $3, %al -; X64-NEXT: orb %dl, %al +; X64-NEXT: shrb %al +; X64-NEXT: andb $2, %al +; X64-NEXT: orb %cl, %al +; X64-NEXT: shrb $3, %dil +; X64-NEXT: andb $1, %dil +; X64-NEXT: orb %dil, %al ; X64-NEXT: retq ; ; X86XOP-LABEL: test_bitreverse_i4: @@ -559,18 +559,18 @@ ; GFNI-LABEL: test_bitreverse_i4: ; GFNI: # %bb.0: ; GFNI-NEXT: # kill: def $edi killed $edi def $rdi +; GFNI-NEXT: leal (%rdi,%rdi), %eax +; GFNI-NEXT: andb $4, %al +; GFNI-NEXT: leal (,%rdi,8), %ecx +; GFNI-NEXT: andb $8, %cl +; GFNI-NEXT: orb %al, %cl ; GFNI-NEXT: movl %edi, %eax -; GFNI-NEXT: andb $8, %al -; GFNI-NEXT: leal (%rdi,%rdi), %ecx -; GFNI-NEXT: andb $4, %cl -; GFNI-NEXT: leal (,%rdi,8), %edx -; GFNI-NEXT: andb $8, %dl -; GFNI-NEXT: orb %cl, %dl -; GFNI-NEXT: shrb %dil -; GFNI-NEXT: andb $2, %dil -; GFNI-NEXT: orb %dil, %dl -; GFNI-NEXT: shrb $3, %al -; GFNI-NEXT: orb %dl, %al +; GFNI-NEXT: shrb %al +; GFNI-NEXT: andb $2, %al +; GFNI-NEXT: orb %cl, %al +; GFNI-NEXT: shrb $3, %dil +; GFNI-NEXT: andb $1, %dil +; GFNI-NEXT: orb %dil, %al ; GFNI-NEXT: retq %b = call i4 @llvm.bitreverse.i4(i4 %a) ret i4 %b diff --git a/llvm/test/CodeGen/X86/bmi-x86_64.ll b/llvm/test/CodeGen/X86/bmi-x86_64.ll --- a/llvm/test/CodeGen/X86/bmi-x86_64.ll +++ b/llvm/test/CodeGen/X86/bmi-x86_64.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI1-SLOW -; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BEXTR-SLOW,BMI2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi | FileCheck %s --check-prefixes=CHECK,BMI1-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2 | FileCheck %s --check-prefixes=CHECK,BMI2-SLOW ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+bmi,+bmi2,+fast-bextr | FileCheck %s --check-prefixes=CHECK,BEXTR-FAST @@ -16,18 +16,11 @@ } define i64 @bextr64b(i64 %x) uwtable ssp { -; BEXTR-SLOW-LABEL: bextr64b: -; BEXTR-SLOW: # %bb.0: -; BEXTR-SLOW-NEXT: movq %rdi, %rax -; BEXTR-SLOW-NEXT: shrl $4, %eax -; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF -; BEXTR-SLOW-NEXT: retq -; -; BEXTR-FAST-LABEL: bextr64b: -; BEXTR-FAST: # %bb.0: -; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 -; BEXTR-FAST-NEXT: bextrl %eax, %edi, %eax -; BEXTR-FAST-NEXT: retq +; CHECK-LABEL: bextr64b: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: retq %1 = lshr i64 %x, 4 %2 = and i64 %1, 4095 ret i64 %2 @@ -46,18 +39,11 @@ } define i64 @bextr64b_load(ptr %x) { -; BEXTR-SLOW-LABEL: bextr64b_load: -; BEXTR-SLOW: # %bb.0: -; BEXTR-SLOW-NEXT: movl (%rdi), %eax -; BEXTR-SLOW-NEXT: shrl $4, %eax -; BEXTR-SLOW-NEXT: andl $4095, %eax # imm = 0xFFF -; BEXTR-SLOW-NEXT: retq -; -; BEXTR-FAST-LABEL: bextr64b_load: -; BEXTR-FAST: # %bb.0: -; BEXTR-FAST-NEXT: movl $3076, %eax # imm = 0xC04 -; BEXTR-FAST-NEXT: bextrl %eax, (%rdi), %eax -; BEXTR-FAST-NEXT: retq +; CHECK-LABEL: bextr64b_load: +; CHECK: # %bb.0: +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax +; CHECK-NEXT: retq %1 = load i64, ptr %x, align 8 %2 = lshr i64 %1, 4 %3 = and i64 %2, 4095 diff --git a/llvm/test/CodeGen/X86/bmi.ll b/llvm/test/CodeGen/X86/bmi.ll --- a/llvm/test/CodeGen/X86/bmi.ll +++ b/llvm/test/CodeGen/X86/bmi.ll @@ -364,32 +364,49 @@ } define i32 @bextr32b(i32 %x) uwtable ssp { -; X86-SLOW-BEXTR-LABEL: bextr32b: +; X86-LABEL: bextr32b: +; X86: # %bb.0: +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: retl +; +; X64-LABEL: bextr32b: +; X64: # %bb.0: +; X64-NEXT: movzwl %di, %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: retq + %1 = lshr i32 %x, 4 + %2 = and i32 %1, 4095 + ret i32 %2 +} + +define i32 @bextr32b_no_mov(i32 %x) uwtable ssp { +; X86-SLOW-BEXTR-LABEL: bextr32b_no_mov: ; X86-SLOW-BEXTR: # %bb.0: ; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: shrl $3, %eax ; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X86-SLOW-BEXTR-NEXT: retl ; -; X64-SLOW-BEXTR-LABEL: bextr32b: +; X64-SLOW-BEXTR-LABEL: bextr32b_no_mov: ; X64-SLOW-BEXTR: # %bb.0: ; X64-SLOW-BEXTR-NEXT: movl %edi, %eax -; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: shrl $3, %eax ; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X64-SLOW-BEXTR-NEXT: retq ; -; X86-FAST-BEXTR-LABEL: bextr32b: +; X86-FAST-BEXTR-LABEL: bextr32b_no_mov: ; X86-FAST-BEXTR: # %bb.0: -; X86-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: movl $3075, %eax # imm = 0xC03 ; X86-FAST-BEXTR-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-FAST-BEXTR-NEXT: retl ; -; X64-FAST-BEXTR-LABEL: bextr32b: +; X64-FAST-BEXTR-LABEL: bextr32b_no_mov: ; X64-FAST-BEXTR: # %bb.0: -; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: movl $3075, %eax # imm = 0xC03 ; X64-FAST-BEXTR-NEXT: bextrl %eax, %edi, %eax ; X64-FAST-BEXTR-NEXT: retq - %1 = lshr i32 %x, 4 + %1 = lshr i32 %x, 3 %2 = and i32 %1, 4095 ret i32 %2 } @@ -412,35 +429,54 @@ } define i32 @bextr32b_load(ptr %x) uwtable ssp { -; X86-SLOW-BEXTR-LABEL: bextr32b_load: +; X86-LABEL: bextr32b_load: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl (%eax), %eax +; X86-NEXT: shrl $4, %eax +; X86-NEXT: retl +; +; X64-LABEL: bextr32b_load: +; X64: # %bb.0: +; X64-NEXT: movzwl (%rdi), %eax +; X64-NEXT: shrl $4, %eax +; X64-NEXT: retq + %1 = load i32, ptr %x + %2 = lshr i32 %1, 4 + %3 = and i32 %2, 4095 + ret i32 %3 +} + +define i32 @bextr32_load_no_mov(ptr %x) uwtable ssp { +; X86-SLOW-BEXTR-LABEL: bextr32_load_no_mov: ; X86-SLOW-BEXTR: # %bb.0: ; X86-SLOW-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-SLOW-BEXTR-NEXT: movl (%eax), %eax -; X86-SLOW-BEXTR-NEXT: shrl $4, %eax +; X86-SLOW-BEXTR-NEXT: shrl $3, %eax ; X86-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X86-SLOW-BEXTR-NEXT: retl ; -; X64-SLOW-BEXTR-LABEL: bextr32b_load: +; X64-SLOW-BEXTR-LABEL: bextr32_load_no_mov: ; X64-SLOW-BEXTR: # %bb.0: ; X64-SLOW-BEXTR-NEXT: movl (%rdi), %eax -; X64-SLOW-BEXTR-NEXT: shrl $4, %eax +; X64-SLOW-BEXTR-NEXT: shrl $3, %eax ; X64-SLOW-BEXTR-NEXT: andl $4095, %eax # imm = 0xFFF ; X64-SLOW-BEXTR-NEXT: retq ; -; X86-FAST-BEXTR-LABEL: bextr32b_load: +; X86-FAST-BEXTR-LABEL: bextr32_load_no_mov: ; X86-FAST-BEXTR: # %bb.0: ; X86-FAST-BEXTR-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-FAST-BEXTR-NEXT: movl $3076, %ecx # imm = 0xC04 +; X86-FAST-BEXTR-NEXT: movl $3075, %ecx # imm = 0xC03 ; X86-FAST-BEXTR-NEXT: bextrl %ecx, (%eax), %eax ; X86-FAST-BEXTR-NEXT: retl ; -; X64-FAST-BEXTR-LABEL: bextr32b_load: +; X64-FAST-BEXTR-LABEL: bextr32_load_no_mov: ; X64-FAST-BEXTR: # %bb.0: -; X64-FAST-BEXTR-NEXT: movl $3076, %eax # imm = 0xC04 +; X64-FAST-BEXTR-NEXT: movl $3075, %eax # imm = 0xC03 ; X64-FAST-BEXTR-NEXT: bextrl %eax, (%rdi), %eax ; X64-FAST-BEXTR-NEXT: retq %1 = load i32, ptr %x - %2 = lshr i32 %1, 4 + %2 = lshr i32 %1, 3 %3 = and i32 %2, 4095 ret i32 %3 } @@ -518,10 +554,10 @@ ; X86-LABEL: blsi32_z: ; X86: # %bb.0: ; X86-NEXT: blsil {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB25_2 +; X86-NEXT: jne .LBB27_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB25_2: +; X86-NEXT: .LBB27_2: ; X86-NEXT: retl ; ; X64-LABEL: blsi32_z: @@ -625,11 +661,11 @@ ; X86-NEXT: andl %ecx, %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: jne .LBB29_2 +; X86-NEXT: jne .LBB31_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB29_2: +; X86-NEXT: .LBB31_2: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -750,10 +786,10 @@ ; X86-LABEL: blsmsk32_z: ; X86: # %bb.0: ; X86-NEXT: blsmskl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB34_2 +; X86-NEXT: jne .LBB36_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB34_2: +; X86-NEXT: .LBB36_2: ; X86-NEXT: retl ; ; X64-LABEL: blsmsk32_z: @@ -855,11 +891,11 @@ ; X86-NEXT: xorl %esi, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: jne .LBB38_2 +; X86-NEXT: jne .LBB40_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: .LBB38_2: +; X86-NEXT: .LBB40_2: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -980,10 +1016,10 @@ ; X86-LABEL: blsr32_z: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB43_2 +; X86-NEXT: jne .LBB45_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: .LBB43_2: +; X86-NEXT: .LBB45_2: ; X86-NEXT: retl ; ; X64-LABEL: blsr32_z: @@ -1085,11 +1121,11 @@ ; X86-NEXT: andl %esi, %edx ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: orl %edx, %ecx -; X86-NEXT: jne .LBB47_2 +; X86-NEXT: jne .LBB49_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NEXT: .LBB47_2: +; X86-NEXT: .LBB49_2: ; X86-NEXT: popl %esi ; X86-NEXT: retl ; @@ -1223,20 +1259,20 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: bextrl %eax, {{[0-9]+}}(%esp), %eax ; X86-NEXT: testl %eax, %eax -; X86-NEXT: js .LBB52_1 +; X86-NEXT: js .LBB54_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB52_1: +; X86-NEXT: .LBB54_1: ; X86-NEXT: retl ; ; X64-LABEL: pr40060: ; X64: # %bb.0: ; X64-NEXT: bextrl %esi, %edi, %eax ; X64-NEXT: testl %eax, %eax -; X64-NEXT: js .LBB52_1 +; X64-NEXT: js .LBB54_1 ; X64-NEXT: # %bb.2: ; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB52_1: +; X64-NEXT: .LBB54_1: ; X64-NEXT: retq %3 = tail call i32 @llvm.x86.bmi.bextr.32(i32 %0, i32 %1) %4 = icmp sgt i32 %3, -1 @@ -1255,10 +1291,10 @@ ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB53_2 +; X86-NEXT: jne .LBB55_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB53_2: +; X86-NEXT: .LBB55_2: ; X86-NEXT: movl %esi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1270,10 +1306,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsrl %edi, %ebx -; X64-NEXT: jne .LBB53_2 +; X64-NEXT: jne .LBB55_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB53_2: +; X64-NEXT: .LBB55_2: ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1307,10 +1343,10 @@ ; X86-NEXT: andl %ecx, %edi ; X86-NEXT: movl %esi, %eax ; X86-NEXT: orl %edi, %eax -; X86-NEXT: jne .LBB54_2 +; X86-NEXT: jne .LBB56_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB54_2: +; X86-NEXT: .LBB56_2: ; X86-NEXT: movl %esi, %eax ; X86-NEXT: movl %edi, %edx ; X86-NEXT: popl %esi @@ -1325,10 +1361,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsrq %rdi, %rbx -; X64-NEXT: jne .LBB54_2 +; X64-NEXT: jne .LBB56_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB54_2: +; X64-NEXT: .LBB56_2: ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1350,10 +1386,10 @@ ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: .cfi_offset %esi, -8 ; X86-NEXT: blsil {{[0-9]+}}(%esp), %esi -; X86-NEXT: jne .LBB55_2 +; X86-NEXT: jne .LBB57_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB55_2: +; X86-NEXT: .LBB57_2: ; X86-NEXT: movl %esi, %eax ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1365,10 +1401,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsil %edi, %ebx -; X64-NEXT: jne .LBB55_2 +; X64-NEXT: jne .LBB57_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB55_2: +; X64-NEXT: .LBB57_2: ; X64-NEXT: movl %ebx, %eax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1402,10 +1438,10 @@ ; X86-NEXT: andl %eax, %edi ; X86-NEXT: movl %edi, %eax ; X86-NEXT: orl %esi, %eax -; X86-NEXT: jne .LBB56_2 +; X86-NEXT: jne .LBB58_2 ; X86-NEXT: # %bb.1: ; X86-NEXT: calll bar -; X86-NEXT: .LBB56_2: +; X86-NEXT: .LBB58_2: ; X86-NEXT: movl %edi, %eax ; X86-NEXT: movl %esi, %edx ; X86-NEXT: popl %esi @@ -1420,10 +1456,10 @@ ; X64-NEXT: .cfi_def_cfa_offset 16 ; X64-NEXT: .cfi_offset %rbx, -16 ; X64-NEXT: blsiq %rdi, %rbx -; X64-NEXT: jne .LBB56_2 +; X64-NEXT: jne .LBB58_2 ; X64-NEXT: # %bb.1: ; X64-NEXT: callq bar -; X64-NEXT: .LBB56_2: +; X64-NEXT: .LBB58_2: ; X64-NEXT: movq %rbx, %rax ; X64-NEXT: popq %rbx ; X64-NEXT: .cfi_def_cfa_offset 8 @@ -1444,19 +1480,19 @@ ; X86-LABEL: pr42118_i32: ; X86: # %bb.0: ; X86-NEXT: blsrl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jne .LBB57_1 +; X86-NEXT: jne .LBB59_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB57_1: +; X86-NEXT: .LBB59_1: ; X86-NEXT: retl ; ; X64-LABEL: pr42118_i32: ; X64: # %bb.0: ; X64-NEXT: blsrl %edi, %eax -; X64-NEXT: jne .LBB57_1 +; X64-NEXT: jne .LBB59_1 ; X64-NEXT: # %bb.2: ; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB57_1: +; X64-NEXT: .LBB59_1: ; X64-NEXT: retq %tmp = sub i32 0, %x %tmp1 = and i32 %tmp, %x @@ -1484,12 +1520,12 @@ ; X86-NEXT: andl %eax, %edx ; X86-NEXT: andl %ecx, %esi ; X86-NEXT: orl %edx, %esi -; X86-NEXT: jne .LBB58_1 +; X86-NEXT: jne .LBB60_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 ; X86-NEXT: jmp bar # TAILCALL -; X86-NEXT: .LBB58_1: +; X86-NEXT: .LBB60_1: ; X86-NEXT: .cfi_def_cfa_offset 8 ; X86-NEXT: popl %esi ; X86-NEXT: .cfi_def_cfa_offset 4 @@ -1498,10 +1534,10 @@ ; X64-LABEL: pr42118_i64: ; X64: # %bb.0: ; X64-NEXT: blsrq %rdi, %rax -; X64-NEXT: jne .LBB58_1 +; X64-NEXT: jne .LBB60_1 ; X64-NEXT: # %bb.2: ; X64-NEXT: jmp bar # TAILCALL -; X64-NEXT: .LBB58_1: +; X64-NEXT: .LBB60_1: ; X64-NEXT: retq %tmp = sub i64 0, %x %tmp1 = and i64 %tmp, %x @@ -1519,11 +1555,11 @@ ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: testl %eax, %eax -; X86-NEXT: jne .LBB59_1 +; X86-NEXT: jne .LBB61_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: retl -; X86-NEXT: .LBB59_1: +; X86-NEXT: .LBB61_1: ; X86-NEXT: blsil %eax, %eax ; X86-NEXT: retl ; @@ -1552,15 +1588,15 @@ ; X86-NEXT: sbbl %esi, %edx ; X86-NEXT: movl %ecx, %edi ; X86-NEXT: orl %esi, %edi -; X86-NEXT: jne .LBB60_1 +; X86-NEXT: jne .LBB62_1 ; X86-NEXT: # %bb.2: ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: jmp .LBB60_3 -; X86-NEXT: .LBB60_1: +; X86-NEXT: jmp .LBB62_3 +; X86-NEXT: .LBB62_1: ; X86-NEXT: andl %esi, %edx ; X86-NEXT: andl %ecx, %eax -; X86-NEXT: .LBB60_3: +; X86-NEXT: .LBB62_3: ; X86-NEXT: popl %esi ; X86-NEXT: popl %edi ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/combine-bitreverse.ll b/llvm/test/CodeGen/X86/combine-bitreverse.ll --- a/llvm/test/CodeGen/X86/combine-bitreverse.ll +++ b/llvm/test/CodeGen/X86/combine-bitreverse.ll @@ -236,8 +236,8 @@ ; X86-NEXT: leal (%eax,%ecx,4), %ecx ; X86-NEXT: movl %ecx, %eax ; X86-NEXT: andl $5592405, %eax # imm = 0x555555 +; X86-NEXT: andl $44739242, %ecx # imm = 0x2AAAAAA ; X86-NEXT: shll $6, %ecx -; X86-NEXT: andl $-1431655808, %ecx # imm = 0xAAAAAA80 ; X86-NEXT: shll $8, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: bswapl %eax @@ -276,8 +276,8 @@ ; X64-NEXT: leal (%rdi,%rax,4), %eax ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: andl $5592405, %ecx # imm = 0x555555 +; X64-NEXT: andl $44739242, %eax # imm = 0x2AAAAAA ; X64-NEXT: shll $6, %eax -; X64-NEXT: andl $-1431655808, %eax # imm = 0xAAAAAA80 ; X64-NEXT: shll $8, %ecx ; X64-NEXT: orl %eax, %ecx ; X64-NEXT: bswapl %ecx diff --git a/llvm/test/CodeGen/X86/combine-rotates.ll b/llvm/test/CodeGen/X86/combine-rotates.ll --- a/llvm/test/CodeGen/X86/combine-rotates.ll +++ b/llvm/test/CodeGen/X86/combine-rotates.ll @@ -440,11 +440,11 @@ ; CHECK-LABEL: rotl_merge_i5: ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi -; CHECK-NEXT: leal (,%rdi,4), %ecx -; CHECK-NEXT: movl %edi, %eax -; CHECK-NEXT: andb $24, %al -; CHECK-NEXT: shrb $3, %al -; CHECK-NEXT: orb %cl, %al +; CHECK-NEXT: leal (,%rdi,4), %eax +; CHECK-NEXT: shrb $3, %dil +; CHECK-NEXT: andb $3, %dil +; CHECK-NEXT: orb %dil, %al +; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %r1 = call i5 @llvm.fshl.i5(i5 %x, i5 %x, i5 -1) %r2 = call i5 @llvm.fshl.i5(i5 %r1, i5 %r1, i5 1) diff --git a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll --- a/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll +++ b/llvm/test/CodeGen/X86/const-shift-of-constmasked.ll @@ -1,9 +1,10 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp ; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X86 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64 ; The mask is all-ones, potentially shifted. - +; NB: Update with `--no_x86_scrub_sp` as for i686 the loads and shift +; can fold making the exact SP value relevant to correctness. ;------------------------------------------------------------------------------; ; 8-bit ;------------------------------------------------------------------------------; @@ -13,16 +14,16 @@ define i8 @test_i8_7_mask_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_7_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $6, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $6, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 7 @@ -33,16 +34,16 @@ define i8 @test_i8_28_mask_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb %al +; X86-NEXT: andb $14, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $14, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -52,16 +53,16 @@ define i8 @test_i8_28_mask_lshr_2(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $2, %al +; X86-NEXT: andb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_2: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb $2, %al +; X64-NEXT: andb $7, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -71,16 +72,16 @@ define i8 @test_i8_28_mask_lshr_3(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $24, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $24, %al ; X64-NEXT: shrb $3, %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -90,16 +91,16 @@ define i8 @test_i8_28_mask_lshr_4(i8 %a0) { ; X86-LABEL: test_i8_28_mask_lshr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $16, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $4, %al +; X86-NEXT: andb $1, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_lshr_4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $16, %al ; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $1, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -110,7 +111,7 @@ define i8 @test_i8_224_mask_lshr_1(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $-32, %al ; X86-NEXT: shrb %al ; X86-NEXT: retl @@ -129,16 +130,16 @@ define i8 @test_i8_224_mask_lshr_4(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $-32, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $4, %al +; X86-NEXT: andb $14, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_224_mask_lshr_4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $-32, %al ; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $14, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 224 @@ -148,7 +149,7 @@ define i8 @test_i8_224_mask_lshr_5(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $5, %al ; X86-NEXT: retl ; @@ -165,7 +166,7 @@ define i8 @test_i8_224_mask_lshr_6(i8 %a0) { ; X86-LABEL: test_i8_224_mask_lshr_6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $6, %al ; X86-NEXT: retl ; @@ -185,16 +186,16 @@ define i8 @test_i8_7_mask_ashr_1(i8 %a0) { ; X86-LABEL: test_i8_7_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $6, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_7_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $6, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 7 @@ -205,16 +206,16 @@ define i8 @test_i8_28_mask_ashr_1(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb %al +; X86-NEXT: andb $14, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb %al +; X64-NEXT: andb $14, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -224,16 +225,16 @@ define i8 @test_i8_28_mask_ashr_2(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $28, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $2, %al +; X86-NEXT: andb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_2: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $28, %al ; X64-NEXT: shrb $2, %al +; X64-NEXT: andb $7, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -243,16 +244,16 @@ define i8 @test_i8_28_mask_ashr_3(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $24, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $3, %al +; X86-NEXT: andb $3, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $24, %al ; X64-NEXT: shrb $3, %al +; X64-NEXT: andb $3, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -262,16 +263,16 @@ define i8 @test_i8_28_mask_ashr_4(i8 %a0) { ; X86-LABEL: test_i8_28_mask_ashr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $16, %al +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shrb $4, %al +; X86-NEXT: andb $1, %al ; X86-NEXT: retl ; ; X64-LABEL: test_i8_28_mask_ashr_4: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andb $16, %al ; X64-NEXT: shrb $4, %al +; X64-NEXT: andb $1, %al ; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = and i8 %a0, 28 @@ -282,7 +283,7 @@ define i8 @test_i8_224_mask_ashr_1(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $-32, %al ; X86-NEXT: sarb %al ; X86-NEXT: retl @@ -301,7 +302,7 @@ define i8 @test_i8_224_mask_ashr_4(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $-32, %al ; X86-NEXT: sarb $4, %al ; X86-NEXT: retl @@ -320,7 +321,7 @@ define i8 @test_i8_224_mask_ashr_5(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: sarb $5, %al ; X86-NEXT: retl ; @@ -337,7 +338,7 @@ define i8 @test_i8_224_mask_ashr_6(i8 %a0) { ; X86-LABEL: test_i8_224_mask_ashr_6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: sarb $6, %al ; X86-NEXT: retl ; @@ -357,7 +358,7 @@ define i8 @test_i8_7_mask_shl_1(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $7, %al ; X86-NEXT: addb %al, %al ; X86-NEXT: retl @@ -376,7 +377,7 @@ define i8 @test_i8_7_mask_shl_4(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $7, %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: retl @@ -395,7 +396,7 @@ define i8 @test_i8_7_mask_shl_5(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_5: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shlb $5, %al ; X86-NEXT: retl ; @@ -412,7 +413,7 @@ define i8 @test_i8_7_mask_shl_6(i8 %a0) { ; X86-LABEL: test_i8_7_mask_shl_6: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: shlb $6, %al ; X86-NEXT: retl ; @@ -430,7 +431,7 @@ define i8 @test_i8_28_mask_shl_1(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $28, %al ; X86-NEXT: addb %al, %al ; X86-NEXT: retl @@ -449,7 +450,7 @@ define i8 @test_i8_28_mask_shl_2(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_2: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $28, %al ; X86-NEXT: shlb $2, %al ; X86-NEXT: retl @@ -468,7 +469,7 @@ define i8 @test_i8_28_mask_shl_3(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_3: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $28, %al ; X86-NEXT: shlb $3, %al ; X86-NEXT: retl @@ -487,7 +488,7 @@ define i8 @test_i8_28_mask_shl_4(i8 %a0) { ; X86-LABEL: test_i8_28_mask_shl_4: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $12, %al ; X86-NEXT: shlb $4, %al ; X86-NEXT: retl @@ -507,7 +508,7 @@ define i8 @test_i8_224_mask_shl_1(i8 %a0) { ; X86-LABEL: test_i8_224_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzbl 4(%esp), %eax ; X86-NEXT: andb $96, %al ; X86-NEXT: addb %al, %al ; X86-NEXT: retl @@ -533,17 +534,17 @@ define i16 @test_i16_127_mask_lshr_1(i16 %a0) { ; X86-LABEL: test_i16_127_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $126, %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $63, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $126, %eax ; X64-NEXT: shrl %eax +; X64-NEXT: andl $63, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 127 @@ -554,17 +555,17 @@ define i16 @test_i16_2032_mask_lshr_3(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_lshr_3: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $254, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_lshr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $2032, %eax # imm = 0x7F0 ; X64-NEXT: shrl $3, %eax +; X64-NEXT: andl $254, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 2032 @@ -574,7 +575,7 @@ define i16 @test_i16_2032_mask_lshr_4(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_lshr_4: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $4, %eax ; X86-NEXT: andl $127, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -594,7 +595,7 @@ define i16 @test_i16_2032_mask_lshr_5(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_lshr_5: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $5, %eax ; X86-NEXT: andl $63, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -614,7 +615,7 @@ define i16 @test_i16_2032_mask_lshr_6(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_lshr_6: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $6, %eax ; X86-NEXT: andl $31, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -635,37 +636,37 @@ define i16 @test_i16_65024_mask_lshr_1(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $65024, %eax # imm = 0xFE00 +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $32512, %eax # imm = 0x7F00 ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $65024, %eax # imm = 0xFE00 ; X64-NEXT: shrl %eax +; X64-NEXT: andl $32512, %eax # imm = 0x7F00 ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 65024 %t1 = lshr i16 %t0, 1 ret i16 %t1 } + define i16 @test_i16_65024_mask_lshr_8(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_lshr_8: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $65024, %eax # imm = 0xFE00 -; X86-NEXT: shrl $8, %eax +; X86-NEXT: movzbl 5(%esp), %eax +; X86-NEXT: andl $-2, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_65024_mask_lshr_8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $65024, %eax # imm = 0xFE00 ; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $254, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 65024 @@ -675,7 +676,7 @@ define i16 @test_i16_65024_mask_lshr_9(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_lshr_9: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: shrl $9, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -693,7 +694,7 @@ define i16 @test_i16_65024_mask_lshr_10(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_lshr_10: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: shrl $10, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -714,17 +715,17 @@ define i16 @test_i16_127_mask_ashr_1(i16 %a0) { ; X86-LABEL: test_i16_127_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $126, %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $63, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $126, %eax ; X64-NEXT: shrl %eax +; X64-NEXT: andl $63, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 127 @@ -735,17 +736,17 @@ define i16 @test_i16_2032_mask_ashr_3(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_ashr_3: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $2032, %eax # imm = 0x7F0 +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $3, %eax +; X86-NEXT: andl $254, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_2032_mask_ashr_3: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $2032, %eax # imm = 0x7F0 ; X64-NEXT: shrl $3, %eax +; X64-NEXT: andl $254, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 2032 @@ -755,7 +756,7 @@ define i16 @test_i16_2032_mask_ashr_4(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_ashr_4: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $4, %eax ; X86-NEXT: andl $127, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -775,7 +776,7 @@ define i16 @test_i16_2032_mask_ashr_5(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_ashr_5: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $5, %eax ; X86-NEXT: andl $63, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -795,7 +796,7 @@ define i16 @test_i16_2032_mask_ashr_6(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_ashr_6: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $6, %eax ; X86-NEXT: andl $31, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -816,7 +817,7 @@ define i16 @test_i16_65024_mask_ashr_1(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $65024, %eax # imm = 0xFE00 ; X86-NEXT: cwtl ; X86-NEXT: shrl %eax @@ -837,7 +838,7 @@ define i16 @test_i16_65024_mask_ashr_8(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_ashr_8: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $65024, %eax # imm = 0xFE00 ; X86-NEXT: cwtl ; X86-NEXT: shrl $8, %eax @@ -858,7 +859,7 @@ define i16 @test_i16_65024_mask_ashr_9(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_ashr_9: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movswl 4(%esp), %eax ; X86-NEXT: shrl $9, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -876,7 +877,7 @@ define i16 @test_i16_65024_mask_ashr_10(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_ashr_10: ; X86: # %bb.0: -; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movswl 4(%esp), %eax ; X86-NEXT: shrl $10, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -897,17 +898,16 @@ define i16 @test_i16_127_mask_shl_1(i16 %a0) { ; X86-LABEL: test_i16_127_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andl $127, %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: addl %eax, %eax +; X86-NEXT: movzbl %al, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_127_mask_shl_1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $127, %edi -; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movzbl %dil, %eax ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %t0 = and i16 %a0, 127 @@ -917,7 +917,7 @@ define i16 @test_i16_127_mask_shl_8(i16 %a0) { ; X86-LABEL: test_i16_127_mask_shl_8: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $127, %eax ; X86-NEXT: shll $8, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -937,7 +937,7 @@ define i16 @test_i16_127_mask_shl_9(i16 %a0) { ; X86-LABEL: test_i16_127_mask_shl_9: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shll $9, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -955,7 +955,7 @@ define i16 @test_i16_127_mask_shl_10(i16 %a0) { ; X86-LABEL: test_i16_127_mask_shl_10: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shll $10, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl @@ -974,7 +974,7 @@ define i16 @test_i16_2032_mask_shl_3(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_shl_3: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $2032, %eax # imm = 0x7F0 ; X86-NEXT: shll $3, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -994,7 +994,7 @@ define i16 @test_i16_2032_mask_shl_4(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_shl_4: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $2032, %eax # imm = 0x7F0 ; X86-NEXT: shll $4, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -1014,7 +1014,7 @@ define i16 @test_i16_2032_mask_shl_5(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_shl_5: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $2032, %eax # imm = 0x7F0 ; X86-NEXT: shll $5, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -1034,7 +1034,7 @@ define i16 @test_i16_2032_mask_shl_6(i16 %a0) { ; X86-LABEL: test_i16_2032_mask_shl_6: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $1008, %eax # imm = 0x3F0 ; X86-NEXT: shll $6, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -1055,7 +1055,7 @@ define i16 @test_i16_65024_mask_shl_1(i16 %a0) { ; X86-LABEL: test_i16_65024_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 4(%esp), %eax ; X86-NEXT: andl $32256, %eax # imm = 0x7E00 ; X86-NEXT: addl %eax, %eax ; X86-NEXT: # kill: def $ax killed $ax killed $eax @@ -1082,16 +1082,16 @@ define i32 @test_i32_32767_mask_lshr_1(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl $32766, %eax # imm = 0x7FFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $32766, %eax # imm = 0x7FFE ; X64-NEXT: shrl %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 32767 %t1 = lshr i32 %t0, 1 @@ -1101,16 +1101,16 @@ define i32 @test_i32_8388352_mask_lshr_7(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_7: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $7, %eax +; X86-NEXT: andl $65534, %eax # imm = 0xFFFE ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_7: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $7, %eax +; X64-NEXT: andl $65534, %eax # imm = 0xFFFE ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 7 @@ -1119,16 +1119,16 @@ define i32 @test_i32_8388352_mask_lshr_8(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_8: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $32767, %eax # imm = 0x7FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 8 @@ -1137,16 +1137,16 @@ define i32 @test_i32_8388352_mask_lshr_9(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_9: ; X86: # %bb.0: -; X86-NEXT: movl $8388096, %eax # imm = 0x7FFE00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $9, %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_9: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388096, %eax # imm = 0x7FFE00 ; X64-NEXT: shrl $9, %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 9 @@ -1155,16 +1155,16 @@ define i32 @test_i32_8388352_mask_lshr_10(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_lshr_10: ; X86: # %bb.0: -; X86-NEXT: movl $8387584, %eax # imm = 0x7FFC00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $10, %eax +; X86-NEXT: andl $8191, %eax # imm = 0x1FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_lshr_10: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8387584, %eax # imm = 0x7FFC00 ; X64-NEXT: shrl $10, %eax +; X64-NEXT: andl $8191, %eax # imm = 0x1FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = lshr i32 %t0, 10 @@ -1175,7 +1175,7 @@ ; X86-LABEL: test_i32_4294836224_mask_lshr_1: ; X86: # %bb.0: ; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: shrl %eax ; X86-NEXT: retl ; @@ -1189,19 +1189,20 @@ %t1 = lshr i32 %t0, 1 ret i32 %t1 } + + define i32 @test_i32_4294836224_mask_lshr_16(i32 %a0) { ; X86-LABEL: test_i32_4294836224_mask_lshr_16: ; X86: # %bb.0: -; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-NEXT: shrl $16, %eax +; X86-NEXT: movzwl 6(%esp), %eax +; X86-NEXT: andl $-2, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_4294836224_mask_lshr_16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $-131072, %eax # imm = 0xFFFE0000 ; X64-NEXT: shrl $16, %eax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i32 %a0, 4294836224 %t1 = lshr i32 %t0, 16 @@ -1210,7 +1211,7 @@ define i32 @test_i32_4294836224_mask_lshr_17(i32 %a0) { ; X86-LABEL: test_i32_4294836224_mask_lshr_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $17, %eax ; X86-NEXT: retl ; @@ -1226,7 +1227,7 @@ define i32 @test_i32_4294836224_mask_lshr_18(i32 %a0) { ; X86-LABEL: test_i32_4294836224_mask_lshr_18: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $18, %eax ; X86-NEXT: retl ; @@ -1245,16 +1246,16 @@ define i32 @test_i32_32767_mask_ashr_1(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movl $32766, %eax # imm = 0x7FFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $32766, %eax # imm = 0x7FFE ; X64-NEXT: shrl %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 32767 %t1 = ashr i32 %t0, 1 @@ -1264,16 +1265,16 @@ define i32 @test_i32_8388352_mask_ashr_7(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_7: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $7, %eax +; X86-NEXT: andl $65534, %eax # imm = 0xFFFE ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_7: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $7, %eax +; X64-NEXT: andl $65534, %eax # imm = 0xFFFE ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 7 @@ -1282,16 +1283,16 @@ define i32 @test_i32_8388352_mask_ashr_8(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_8: ; X86: # %bb.0: -; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $32767, %eax # imm = 0x7FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_8: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $32767, %eax # imm = 0x7FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 8 @@ -1300,16 +1301,16 @@ define i32 @test_i32_8388352_mask_ashr_9(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_9: ; X86: # %bb.0: -; X86-NEXT: movl $8388096, %eax # imm = 0x7FFE00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $9, %eax +; X86-NEXT: andl $16383, %eax # imm = 0x3FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_9: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8388096, %eax # imm = 0x7FFE00 ; X64-NEXT: shrl $9, %eax +; X64-NEXT: andl $16383, %eax # imm = 0x3FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 9 @@ -1318,16 +1319,16 @@ define i32 @test_i32_8388352_mask_ashr_10(i32 %a0) { ; X86-LABEL: test_i32_8388352_mask_ashr_10: ; X86: # %bb.0: -; X86-NEXT: movl $8387584, %eax # imm = 0x7FFC00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $10, %eax +; X86-NEXT: andl $8191, %eax # imm = 0x1FFF ; X86-NEXT: retl ; ; X64-LABEL: test_i32_8388352_mask_ashr_10: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: andl $8387584, %eax # imm = 0x7FFC00 ; X64-NEXT: shrl $10, %eax +; X64-NEXT: andl $8191, %eax # imm = 0x1FFF ; X64-NEXT: retq %t0 = and i32 %a0, 8388352 %t1 = ashr i32 %t0, 10 @@ -1338,7 +1339,7 @@ ; X86-LABEL: test_i32_4294836224_mask_ashr_1: ; X86: # %bb.0: ; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: sarl %eax ; X86-NEXT: retl ; @@ -1356,7 +1357,7 @@ ; X86-LABEL: test_i32_4294836224_mask_ashr_16: ; X86: # %bb.0: ; X86-NEXT: movl $-131072, %eax # imm = 0xFFFE0000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: sarl $16, %eax ; X86-NEXT: retl ; @@ -1373,7 +1374,7 @@ define i32 @test_i32_4294836224_mask_ashr_17(i32 %a0) { ; X86-LABEL: test_i32_4294836224_mask_ashr_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: sarl $17, %eax ; X86-NEXT: retl ; @@ -1389,7 +1390,7 @@ define i32 @test_i32_4294836224_mask_ashr_18(i32 %a0) { ; X86-LABEL: test_i32_4294836224_mask_ashr_18: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: sarl $18, %eax ; X86-NEXT: retl ; @@ -1408,16 +1409,15 @@ define i32 @test_i32_32767_mask_shl_1(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: addl %eax, %eax +; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: retl ; ; X64-LABEL: test_i32_32767_mask_shl_1: ; X64: # %bb.0: -; X64-NEXT: # kill: def $edi killed $edi def $rdi -; X64-NEXT: andl $32767, %edi # imm = 0x7FFF -; X64-NEXT: leal (%rdi,%rdi), %eax +; X64-NEXT: addl %edi, %edi +; X64-NEXT: movzwl %di, %eax ; X64-NEXT: retq %t0 = and i32 %a0, 32767 %t1 = shl i32 %t0, 1 @@ -1427,7 +1427,7 @@ ; X86-LABEL: test_i32_32767_mask_shl_16: ; X86: # %bb.0: ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: shll $16, %eax ; X86-NEXT: retl ; @@ -1444,7 +1444,7 @@ define i32 @test_i32_32767_mask_shl_17(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_shl_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shll $17, %eax ; X86-NEXT: retl ; @@ -1460,7 +1460,7 @@ define i32 @test_i32_32767_mask_shl_18(i32 %a0) { ; X86-LABEL: test_i32_32767_mask_shl_18: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shll $18, %eax ; X86-NEXT: retl ; @@ -1478,7 +1478,7 @@ ; X86-LABEL: test_i32_8388352_mask_shl_7: ; X86: # %bb.0: ; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: shll $7, %eax ; X86-NEXT: retl ; @@ -1496,7 +1496,7 @@ ; X86-LABEL: test_i32_8388352_mask_shl_8: ; X86: # %bb.0: ; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: shll $8, %eax ; X86-NEXT: retl ; @@ -1514,7 +1514,7 @@ ; X86-LABEL: test_i32_8388352_mask_shl_9: ; X86: # %bb.0: ; X86-NEXT: movl $8388352, %eax # imm = 0x7FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: shll $9, %eax ; X86-NEXT: retl ; @@ -1532,7 +1532,7 @@ ; X86-LABEL: test_i32_8388352_mask_shl_10: ; X86: # %bb.0: ; X86-NEXT: movl $4194048, %eax # imm = 0x3FFF00 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: shll $10, %eax ; X86-NEXT: retl ; @@ -1551,7 +1551,7 @@ ; X86-LABEL: test_i32_4294836224_mask_shl_1: ; X86: # %bb.0: ; X86-NEXT: movl $2147352576, %eax # imm = 0x7FFE0000 -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 4(%esp), %eax ; X86-NEXT: addl %eax, %eax ; X86-NEXT: retl ; @@ -1575,17 +1575,17 @@ define i64 @test_i64_2147483647_mask_lshr_1(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_lshr_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: andl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-NEXT: shrq %rax +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X64-NEXT: retq %t0 = and i64 %a0, 2147483647 %t1 = lshr i64 %t0, 1 @@ -1595,18 +1595,18 @@ define i64 @test_i64_140737488289792_mask_lshr_15(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_lshr_15: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl 6(%esp), %ecx ; X86-NEXT: shll $16, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 8(%esp), %eax ; X86-NEXT: shldl $17, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_lshr_15: ; X64: # %bb.0: -; X64-NEXT: movabsq $140737488289792, %rax # imm = 0x7FFFFFFF0000 -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $15, %rax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i64 %a0, 140737488289792 %t1 = lshr i64 %t0, 15 @@ -1615,9 +1615,9 @@ define i64 @test_i64_140737488289792_mask_lshr_16(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_lshr_16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%esp), %ecx ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 8(%esp), %eax ; X86-NEXT: shldl $16, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1635,9 +1635,9 @@ define i64 @test_i64_140737488289792_mask_lshr_17(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_lshr_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%esp), %ecx ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 8(%esp), %eax ; X86-NEXT: shldl $15, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1655,9 +1655,9 @@ define i64 @test_i64_140737488289792_mask_lshr_18(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_lshr_18: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%esp), %ecx ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 8(%esp), %eax ; X86-NEXT: shldl $14, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1676,7 +1676,7 @@ define i64 @test_i64_18446744065119617024_mask_lshr_1(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: shrl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -1694,16 +1694,16 @@ define i64 @test_i64_18446744065119617024_mask_lshr_32(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_lshr_32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 8(%esp), %eax ; X86-NEXT: andl $-2, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_18446744065119617024_mask_lshr_32: ; X64: # %bb.0: -; X64-NEXT: movabsq $-8589934592, %rax # imm = 0xFFFFFFFE00000000 -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $32, %rax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i64 %a0, 18446744065119617024 %t1 = lshr i64 %t0, 32 @@ -1712,7 +1712,7 @@ define i64 @test_i64_18446744065119617024_mask_lshr_33(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_lshr_33: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 8(%esp), %eax ; X86-NEXT: shrl %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1729,7 +1729,7 @@ define i64 @test_i64_18446744065119617024_mask_lshr_34(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_lshr_34: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 8(%esp), %eax ; X86-NEXT: shrl $2, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1749,17 +1749,17 @@ define i64 @test_i64_2147483647_mask_ashr_1(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movl $2147483646, %eax # imm = 0x7FFFFFFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl %eax +; X86-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_ashr_1: ; X64: # %bb.0: ; X64-NEXT: movq %rdi, %rax -; X64-NEXT: andl $2147483646, %eax # imm = 0x7FFFFFFE -; X64-NEXT: shrq %rax +; X64-NEXT: shrl %eax +; X64-NEXT: andl $1073741823, %eax # imm = 0x3FFFFFFF ; X64-NEXT: retq %t0 = and i64 %a0, 2147483647 %t1 = ashr i64 %t0, 1 @@ -1769,18 +1769,18 @@ define i64 @test_i64_140737488289792_mask_ashr_15(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_ashr_15: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl 6(%esp), %ecx ; X86-NEXT: shll $16, %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 8(%esp), %eax ; X86-NEXT: shldl $17, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_140737488289792_mask_ashr_15: ; X64: # %bb.0: -; X64-NEXT: movabsq $140737488289792, %rax # imm = 0x7FFFFFFF0000 -; X64-NEXT: andq %rdi, %rax +; X64-NEXT: movq %rdi, %rax ; X64-NEXT: shrq $15, %rax +; X64-NEXT: andl $-2, %eax ; X64-NEXT: retq %t0 = and i64 %a0, 140737488289792 %t1 = ashr i64 %t0, 15 @@ -1789,9 +1789,9 @@ define i64 @test_i64_140737488289792_mask_ashr_16(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_ashr_16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%esp), %ecx ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 8(%esp), %eax ; X86-NEXT: shldl $16, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1809,9 +1809,9 @@ define i64 @test_i64_140737488289792_mask_ashr_17(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_ashr_17: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%esp), %ecx ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 8(%esp), %eax ; X86-NEXT: shldl $15, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1829,9 +1829,9 @@ define i64 @test_i64_140737488289792_mask_ashr_18(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_ashr_18: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl 4(%esp), %ecx ; X86-NEXT: movl $32767, %eax # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl 8(%esp), %eax ; X86-NEXT: shldl $14, %ecx, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl @@ -1850,7 +1850,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_1(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_ashr_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: sarl %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -1868,7 +1868,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_32(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_ashr_32: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: movl %edx, %eax ; X86-NEXT: andl $-2, %eax ; X86-NEXT: sarl $31, %edx @@ -1887,7 +1887,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_33(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_ashr_33: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: movl %edx, %eax ; X86-NEXT: sarl %eax ; X86-NEXT: sarl $31, %edx @@ -1905,7 +1905,7 @@ define i64 @test_i64_18446744065119617024_mask_ashr_34(i64 %a0) { ; X86-LABEL: test_i64_18446744065119617024_mask_ashr_34: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: movl %edx, %eax ; X86-NEXT: sarl $2, %eax ; X86-NEXT: sarl $31, %edx @@ -1926,15 +1926,14 @@ define i64 @test_i64_2147483647_mask_shl_1(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_shl_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: addl %eax, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ; ; X64-LABEL: test_i64_2147483647_mask_shl_1: ; X64: # %bb.0: -; X64-NEXT: andl $2147483647, %edi # imm = 0x7FFFFFFF -; X64-NEXT: leaq (%rdi,%rdi), %rax +; X64-NEXT: leal (%rdi,%rdi), %eax ; X64-NEXT: retq %t0 = and i64 %a0, 2147483647 %t1 = shl i64 %t0, 1 @@ -1944,7 +1943,7 @@ ; X86-LABEL: test_i64_2147483647_mask_shl_32: ; X86: # %bb.0: ; X86-NEXT: movl $2147483647, %edx # imm = 0x7FFFFFFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl 4(%esp), %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl ; @@ -1961,7 +1960,7 @@ define i64 @test_i64_2147483647_mask_shl_33(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_shl_33: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 4(%esp), %edx ; X86-NEXT: addl %edx, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -1978,7 +1977,7 @@ define i64 @test_i64_2147483647_mask_shl_34(i64 %a0) { ; X86-LABEL: test_i64_2147483647_mask_shl_34: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 4(%esp), %edx ; X86-NEXT: shll $2, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -1996,9 +1995,9 @@ define i64 @test_i64_140737488289792_mask_shl_15(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_shl_15: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: movl $32767, %edx # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl 8(%esp), %edx ; X86-NEXT: shldl $15, %eax, %edx ; X86-NEXT: andl $65536, %eax # imm = 0x10000 ; X86-NEXT: shll $15, %eax @@ -2017,9 +2016,9 @@ define i64 @test_i64_140737488289792_mask_shl_16(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_shl_16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: movl $32767, %edx # imm = 0x7FFF -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl 8(%esp), %edx ; X86-NEXT: shldl $16, %eax, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -2037,9 +2036,9 @@ define i64 @test_i64_140737488289792_mask_shl_17(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_shl_17: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 6(%esp), %eax ; X86-NEXT: shll $16, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: shldl $17, %eax, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -2057,9 +2056,9 @@ define i64 @test_i64_140737488289792_mask_shl_18(i64 %a0) { ; X86-LABEL: test_i64_140737488289792_mask_shl_18: ; X86: # %bb.0: -; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl 6(%esp), %eax ; X86-NEXT: shll $16, %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl 8(%esp), %edx ; X86-NEXT: shldl $18, %eax, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl @@ -2079,7 +2078,7 @@ ; X86-LABEL: test_i64_18446744065119617024_mask_shl_1: ; X86: # %bb.0: ; X86-NEXT: movl $2147483646, %edx # imm = 0x7FFFFFFE -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx +; X86-NEXT: andl 8(%esp), %edx ; X86-NEXT: addl %edx, %edx ; X86-NEXT: xorl %eax, %eax ; X86-NEXT: retl diff --git a/llvm/test/CodeGen/X86/fold-and-shift.ll b/llvm/test/CodeGen/X86/fold-and-shift.ll --- a/llvm/test/CodeGen/X86/fold-and-shift.ll +++ b/llvm/test/CodeGen/X86/fold-and-shift.ll @@ -4,10 +4,9 @@ define i32 @t1(ptr %X, i32 %i) { ; CHECK-LABEL: t1: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax +; CHECK-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movzbl %cl, %ecx -; CHECK-NEXT: movl (%eax,%ecx,4), %eax +; CHECK-NEXT: movl (%ecx,%eax,4), %eax ; CHECK-NEXT: retl entry: @@ -22,9 +21,9 @@ ; CHECK-LABEL: t2: ; CHECK: # %bb.0: # %entry ; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax -; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx -; CHECK-NEXT: movzwl %cx, %ecx -; CHECK-NEXT: movl (%eax,%ecx,4), %eax +; CHECK-NEXT: movzwl {{[0-9]+}}(%esp), %ecx +; CHECK-NEXT: addl %ecx, %ecx +; CHECK-NEXT: movl (%eax,%ecx,2), %eax ; CHECK-NEXT: retl entry: diff --git a/llvm/test/CodeGen/X86/limited-prec.ll b/llvm/test/CodeGen/X86/limited-prec.ll --- a/llvm/test/CodeGen/X86/limited-prec.ll +++ b/llvm/test/CodeGen/X86/limited-prec.ll @@ -318,8 +318,8 @@ ; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision6-NEXT: movl %ecx, (%esp) -; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: movzbl %al, %eax ; precision6-NEXT: addl $-127, %eax ; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision6-NEXT: flds (%esp) @@ -342,8 +342,8 @@ ; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision12-NEXT: movl %ecx, (%esp) -; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: movzbl %al, %eax ; precision12-NEXT: addl $-127, %eax ; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision12-NEXT: flds (%esp) @@ -370,8 +370,8 @@ ; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision18-NEXT: movl %ecx, (%esp) -; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: movzbl %al, %eax ; precision18-NEXT: addl $-127, %eax ; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision18-NEXT: flds (%esp) @@ -410,8 +410,8 @@ ; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision6-NEXT: movl %ecx, (%esp) -; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: movzbl %al, %eax ; precision6-NEXT: addl $-127, %eax ; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision6-NEXT: flds (%esp) @@ -432,8 +432,8 @@ ; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision12-NEXT: movl %ecx, (%esp) -; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: movzbl %al, %eax ; precision12-NEXT: addl $-127, %eax ; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision12-NEXT: flds (%esp) @@ -458,8 +458,8 @@ ; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision18-NEXT: movl %ecx, (%esp) -; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: movzbl %al, %eax ; precision18-NEXT: addl $-127, %eax ; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision18-NEXT: flds (%esp) @@ -496,8 +496,8 @@ ; precision6-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision6-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision6-NEXT: movl %ecx, (%esp) -; precision6-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision6-NEXT: shrl $23, %eax +; precision6-NEXT: movzbl %al, %eax ; precision6-NEXT: addl $-127, %eax ; precision6-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision6-NEXT: flds (%esp) @@ -520,8 +520,8 @@ ; precision12-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision12-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision12-NEXT: movl %ecx, (%esp) -; precision12-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision12-NEXT: shrl $23, %eax +; precision12-NEXT: movzbl %al, %eax ; precision12-NEXT: addl $-127, %eax ; precision12-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision12-NEXT: flds (%esp) @@ -546,8 +546,8 @@ ; precision18-NEXT: andl $8388607, %ecx # imm = 0x7FFFFF ; precision18-NEXT: orl $1065353216, %ecx # imm = 0x3F800000 ; precision18-NEXT: movl %ecx, (%esp) -; precision18-NEXT: andl $2139095040, %eax # imm = 0x7F800000 ; precision18-NEXT: shrl $23, %eax +; precision18-NEXT: movzbl %al, %eax ; precision18-NEXT: addl $-127, %eax ; precision18-NEXT: movl %eax, {{[0-9]+}}(%esp) ; precision18-NEXT: flds (%esp) diff --git a/llvm/test/CodeGen/X86/movmsk-cmp.ll b/llvm/test/CodeGen/X86/movmsk-cmp.ll --- a/llvm/test/CodeGen/X86/movmsk-cmp.ll +++ b/llvm/test/CodeGen/X86/movmsk-cmp.ll @@ -3853,8 +3853,6 @@ ; SSE-NEXT: shrl $15, %ecx ; SSE-NEXT: movl %eax, %edx ; SSE-NEXT: shrl $8, %edx -; SSE-NEXT: andl $1, %edx -; SSE-NEXT: andl $8, %eax ; SSE-NEXT: shrl $3, %eax ; SSE-NEXT: xorl %edx, %eax ; SSE-NEXT: andl %ecx, %eax @@ -3869,8 +3867,6 @@ ; AVX1OR2-NEXT: shrl $15, %ecx ; AVX1OR2-NEXT: movl %eax, %edx ; AVX1OR2-NEXT: shrl $8, %edx -; AVX1OR2-NEXT: andl $1, %edx -; AVX1OR2-NEXT: andl $8, %eax ; AVX1OR2-NEXT: shrl $3, %eax ; AVX1OR2-NEXT: xorl %edx, %eax ; AVX1OR2-NEXT: andl %ecx, %eax @@ -3975,8 +3971,8 @@ ; SSE-NEXT: movmskps %xmm1, %eax ; SSE-NEXT: movl %eax, %ecx ; SSE-NEXT: shrb $3, %cl -; SSE-NEXT: andb $4, %al ; SSE-NEXT: shrb $2, %al +; SSE-NEXT: andb $1, %al ; SSE-NEXT: xorb %cl, %al ; SSE-NEXT: # kill: def $al killed $al killed $eax ; SSE-NEXT: retq @@ -3987,8 +3983,8 @@ ; AVX1OR2-NEXT: vmovmskps %xmm0, %eax ; AVX1OR2-NEXT: movl %eax, %ecx ; AVX1OR2-NEXT: shrb $3, %cl -; AVX1OR2-NEXT: andb $4, %al ; AVX1OR2-NEXT: shrb $2, %al +; AVX1OR2-NEXT: andb $1, %al ; AVX1OR2-NEXT: xorb %cl, %al ; AVX1OR2-NEXT: # kill: def $al killed $al killed $eax ; AVX1OR2-NEXT: retq diff --git a/llvm/test/CodeGen/X86/pr15267.ll b/llvm/test/CodeGen/X86/pr15267.ll --- a/llvm/test/CodeGen/X86/pr15267.ll +++ b/llvm/test/CodeGen/X86/pr15267.ll @@ -85,19 +85,17 @@ ; CHECK: # %bb.0: ; CHECK-NEXT: movq (%rdi), %rax ; CHECK-NEXT: movl %eax, %ecx -; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: andl $15, %ecx -; CHECK-NEXT: movl %eax, %edx -; CHECK-NEXT: andl $15, %edx -; CHECK-NEXT: vmovd %edx, %xmm0 +; CHECK-NEXT: vmovd %ecx, %xmm0 +; CHECK-NEXT: movzbl %al, %ecx +; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $8, %ecx ; CHECK-NEXT: andl $15, %ecx ; CHECK-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 -; CHECK-NEXT: movl %eax, %ecx +; CHECK-NEXT: movzwl %ax, %ecx ; CHECK-NEXT: shrl $12, %ecx -; CHECK-NEXT: andl $15, %ecx ; CHECK-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 ; CHECK-NEXT: movl %eax, %ecx ; CHECK-NEXT: shrl $16, %ecx diff --git a/llvm/test/CodeGen/X86/pr45995.ll b/llvm/test/CodeGen/X86/pr45995.ll --- a/llvm/test/CodeGen/X86/pr45995.ll +++ b/llvm/test/CodeGen/X86/pr45995.ll @@ -18,11 +18,11 @@ ; CHECK-NEXT: mov ebx, edi ; CHECK-NEXT: shr bl, 3 ; CHECK-NEXT: mov ebp, edi -; CHECK-NEXT: and bpl, 4 ; CHECK-NEXT: shr bpl, 2 +; CHECK-NEXT: and bpl, 1 ; CHECK-NEXT: mov r14d, edi -; CHECK-NEXT: and r14b, 2 ; CHECK-NEXT: shr r14b +; CHECK-NEXT: and r14b, 1 ; CHECK-NEXT: call print_i1@PLT ; CHECK-NEXT: movzx edi, r14b ; CHECK-NEXT: call print_i1@PLT @@ -78,21 +78,21 @@ ; CHECK-NEXT: shr al, 3 ; CHECK-NEXT: mov byte ptr [rsp + 7], al # 1-byte Spill ; CHECK-NEXT: mov r14d, ebx -; CHECK-NEXT: and r14b, 4 ; CHECK-NEXT: shr r14b, 2 +; CHECK-NEXT: and r14b, 1 ; CHECK-NEXT: mov r15d, ebx -; CHECK-NEXT: and r15b, 2 ; CHECK-NEXT: shr r15b +; CHECK-NEXT: and r15b, 1 ; CHECK-NEXT: vpslld xmm0, xmm0, 31 ; CHECK-NEXT: vmovmskps edi, xmm0 ; CHECK-NEXT: mov r12d, edi ; CHECK-NEXT: shr r12b, 3 ; CHECK-NEXT: mov r13d, edi -; CHECK-NEXT: and r13b, 4 ; CHECK-NEXT: shr r13b, 2 +; CHECK-NEXT: and r13b, 1 ; CHECK-NEXT: mov ebp, edi -; CHECK-NEXT: and bpl, 2 ; CHECK-NEXT: shr bpl +; CHECK-NEXT: and bpl, 1 ; CHECK-NEXT: call print_i1@PLT ; CHECK-NEXT: movzx edi, bpl ; CHECK-NEXT: call print_i1@PLT diff --git a/llvm/test/CodeGen/X86/pull-binop-through-shift.ll b/llvm/test/CodeGen/X86/pull-binop-through-shift.ll --- a/llvm/test/CodeGen/X86/pull-binop-through-shift.ll +++ b/llvm/test/CodeGen/X86/pull-binop-through-shift.ll @@ -217,9 +217,9 @@ ; X86-LABEL: and_nosignbit_lshr: ; X86: # %bb.0: ; X86-NEXT: movl 8(%esp), %ecx -; X86-NEXT: movl $2147418112, %eax # imm = 0x7FFF0000 -; X86-NEXT: andl 4(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X86-NEXT: movl %eax, (%ecx) ; X86-NEXT: retl %t0 = and i32 %x, 2147418112 ; 0x7FFF0000 @@ -398,9 +398,9 @@ ; X86-LABEL: and_nosignbit_ashr: ; X86: # %bb.0: ; X86-NEXT: movl 8(%esp), %ecx -; X86-NEXT: movl $2147418112, %eax # imm = 0x7FFF0000 -; X86-NEXT: andl 4(%esp), %eax +; X86-NEXT: movl 4(%esp), %eax ; X86-NEXT: shrl $8, %eax +; X86-NEXT: andl $8388352, %eax # imm = 0x7FFF00 ; X86-NEXT: movl %eax, (%ecx) ; X86-NEXT: retl %t0 = and i32 %x, 2147418112 ; 0x7FFF0000 diff --git a/llvm/test/CodeGen/X86/rev16.ll b/llvm/test/CodeGen/X86/rev16.ll --- a/llvm/test/CodeGen/X86/rev16.ll +++ b/llvm/test/CodeGen/X86/rev16.ll @@ -29,22 +29,22 @@ define i32 @not_rev16(i32 %a) { ; X86-LABEL: not_rev16: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl %ecx, %eax -; X86-NEXT: shll $8, %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrl $8, %ecx ; X86-NEXT: andl $65280, %ecx # imm = 0xFF00 -; X86-NEXT: andl $16711680, %eax # imm = 0xFF0000 +; X86-NEXT: andl $65280, %eax # imm = 0xFF00 +; X86-NEXT: shll $8, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl ; ; X64-LABEL: not_rev16: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $8, %eax -; X64-NEXT: shrl $8, %edi +; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $65280, %eax # imm = 0xFF00 ; X64-NEXT: andl $65280, %edi # imm = 0xFF00 -; X64-NEXT: andl $16711680, %eax # imm = 0xFF0000 +; X64-NEXT: shll $8, %edi ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %l8 = shl i32 %a, 8 @@ -117,11 +117,11 @@ define i32 @different_shift_amount(i32 %a) { ; X86-LABEL: different_shift_amount: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl %eax, %ecx -; X86-NEXT: shll $9, %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl %ecx, %eax ; X86-NEXT: shrl $8, %eax -; X86-NEXT: andl $-16712192, %ecx # imm = 0xFF00FE00 +; X86-NEXT: andl $8355967, %ecx # imm = 0x7F807F +; X86-NEXT: shll $9, %ecx ; X86-NEXT: andl $16711935, %eax # imm = 0xFF00FF ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl @@ -129,10 +129,10 @@ ; X64-LABEL: different_shift_amount: ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax -; X64-NEXT: shll $9, %eax -; X64-NEXT: shrl $8, %edi -; X64-NEXT: andl $-16712192, %eax # imm = 0xFF00FE00 -; X64-NEXT: andl $16711935, %edi # imm = 0xFF00FF +; X64-NEXT: shrl $8, %eax +; X64-NEXT: andl $8355967, %edi # imm = 0x7F807F +; X64-NEXT: shll $9, %edi +; X64-NEXT: andl $16711935, %eax # imm = 0xFF00FF ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq %l8 = shl i32 %a, 9 diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll --- a/llvm/test/CodeGen/X86/rotate-extract.ll +++ b/llvm/test/CodeGen/X86/rotate-extract.ll @@ -166,7 +166,7 @@ ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shrl $9, %ecx -; X86-NEXT: andl $-8, %eax +; X86-NEXT: andl $120, %eax ; X86-NEXT: shll $25, %eax ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: retl @@ -175,7 +175,7 @@ ; X64: # %bb.0: ; X64-NEXT: movl %edi, %eax ; X64-NEXT: shrl $9, %eax -; X64-NEXT: andl $-8, %edi +; X64-NEXT: andl $120, %edi ; X64-NEXT: shll $25, %edi ; X64-NEXT: orl %edi, %eax ; X64-NEXT: retq diff --git a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll --- a/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll +++ b/llvm/test/CodeGen/X86/selectcc-to-shiftand.ll @@ -169,9 +169,8 @@ define i16 @sel_shift_bool_i16(i1 %t) { ; ANY-LABEL: sel_shift_bool_i16: ; ANY: # %bb.0: -; ANY-NEXT: movl %edi, %eax -; ANY-NEXT: andl $1, %eax -; ANY-NEXT: shll $7, %eax +; ANY-NEXT: shll $7, %edi +; ANY-NEXT: movzbl %dil, %eax ; ANY-NEXT: # kill: def $ax killed $ax killed $eax ; ANY-NEXT: retq %shl = select i1 %t, i16 128, i16 0 diff --git a/llvm/test/CodeGen/X86/setcc.ll b/llvm/test/CodeGen/X86/setcc.ll --- a/llvm/test/CodeGen/X86/setcc.ll +++ b/llvm/test/CodeGen/X86/setcc.ll @@ -281,9 +281,8 @@ ; X86-LABEL: shift_and: ; X86: ## %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax -; X86-NEXT: andb $4, %al -; X86-NEXT: shrb $2, %al -; X86-NEXT: movzbl %al, %eax +; X86-NEXT: shrl $2, %eax +; X86-NEXT: andl $1, %eax ; X86-NEXT: ## kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/shift-amount-mod.ll b/llvm/test/CodeGen/X86/shift-amount-mod.ll --- a/llvm/test/CodeGen/X86/shift-amount-mod.ll +++ b/llvm/test/CodeGen/X86/shift-amount-mod.ll @@ -1556,9 +1556,8 @@ define i16 @sh_trunc_sh(i64 %x) { ; X32-LABEL: sh_trunc_sh: ; X32: # %bb.0: -; X32-NEXT: movl {{[0-9]+}}(%esp), %eax +; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax ; X32-NEXT: shrl $4, %eax -; X32-NEXT: andl $15, %eax ; X32-NEXT: # kill: def $ax killed $ax killed $eax ; X32-NEXT: retl ; diff --git a/llvm/test/CodeGen/X86/shift-mask.ll b/llvm/test/CodeGen/X86/shift-mask.ll --- a/llvm/test/CodeGen/X86/shift-mask.ll +++ b/llvm/test/CodeGen/X86/shift-mask.ll @@ -113,17 +113,17 @@ define i16 @test_i16_shl_lshr_1(i16 %a0) { ; X86-LABEL: test_i16_shl_lshr_1: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $16376, %eax # imm = 0x3FF8 ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $65504, %eax # imm = 0xFFE0 ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-MASK-LABEL: test_i16_shl_lshr_1: ; X64-MASK: # %bb.0: ; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andl $16376, %edi # imm = 0x3FF8 ; X64-MASK-NEXT: leal (,%rdi,4), %eax -; X64-MASK-NEXT: andl $65504, %eax # imm = 0xFFE0 ; X64-MASK-NEXT: # kill: def $ax killed $ax killed $eax ; X64-MASK-NEXT: retq ; @@ -365,15 +365,15 @@ ; X86-LABEL: test_i8_lshr_lshr_2: ; X86: # %bb.0: ; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andb $7, %al ; X86-NEXT: shlb $2, %al -; X86-NEXT: andb $28, %al ; X86-NEXT: retl ; ; X64-MASK-LABEL: test_i8_lshr_lshr_2: ; X64-MASK: # %bb.0: ; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andb $7, %dil ; X64-MASK-NEXT: leal (,%rdi,4), %eax -; X64-MASK-NEXT: andb $28, %al ; X64-MASK-NEXT: # kill: def $al killed $al killed $eax ; X64-MASK-NEXT: retq ; @@ -453,17 +453,17 @@ define i16 @test_i16_lshr_lshr_2(i16 %a0) { ; X86-LABEL: test_i16_lshr_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-NEXT: andl $2047, %eax # imm = 0x7FF ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $8188, %eax # imm = 0x1FFC ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: retl ; ; X64-LABEL: test_i16_lshr_lshr_2: ; X64: # %bb.0: ; X64-NEXT: # kill: def $edi killed $edi def $rdi +; X64-NEXT: andl $2047, %edi # imm = 0x7FF ; X64-NEXT: leal (,%rdi,4), %eax -; X64-NEXT: andl $8188, %eax # imm = 0x1FFC ; X64-NEXT: # kill: def $ax killed $ax killed $eax ; X64-NEXT: retq %1 = shl i16 %a0, 5 @@ -517,16 +517,16 @@ define i32 @test_i32_lshr_lshr_2(i32 %a0) { ; X86-LABEL: test_i32_lshr_lshr_2: ; X86: # %bb.0: -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl $134217727, %eax # imm = 0x7FFFFFF +; X86-NEXT: andl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC ; X86-NEXT: retl ; ; X64-MASK-LABEL: test_i32_lshr_lshr_2: ; X64-MASK: # %bb.0: ; X64-MASK-NEXT: # kill: def $edi killed $edi def $rdi +; X64-MASK-NEXT: andl $134217727, %edi # imm = 0x7FFFFFF ; X64-MASK-NEXT: leal (,%rdi,4), %eax -; X64-MASK-NEXT: andl $536870908, %eax # imm = 0x1FFFFFFC ; X64-MASK-NEXT: retq ; ; X64-SHIFT-LABEL: test_i32_lshr_lshr_2: @@ -620,9 +620,9 @@ ; ; X64-MASK-LABEL: test_i64_lshr_lshr_2: ; X64-MASK: # %bb.0: -; X64-MASK-NEXT: leaq (,%rdi,4), %rcx -; X64-MASK-NEXT: movabsq $2305843009213693948, %rax # imm = 0x1FFFFFFFFFFFFFFC -; X64-MASK-NEXT: andq %rcx, %rax +; X64-MASK-NEXT: movabsq $576460752303423487, %rax # imm = 0x7FFFFFFFFFFFFFF +; X64-MASK-NEXT: andq %rdi, %rax +; X64-MASK-NEXT: shlq $2, %rax ; X64-MASK-NEXT: retq ; ; X64-SHIFT-LABEL: test_i64_lshr_lshr_2: diff --git a/llvm/test/CodeGen/X86/sttni.ll b/llvm/test/CodeGen/X86/sttni.ll --- a/llvm/test/CodeGen/X86/sttni.ll +++ b/llvm/test/CodeGen/X86/sttni.ll @@ -315,11 +315,10 @@ ; X86-NEXT: jmp .LBB8_3 ; X86-NEXT: .LBB8_2: # %compare ; X86-NEXT: movdqa %xmm0, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: .LBB8_3: # %exit ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movl %ebp, %esp @@ -452,11 +451,10 @@ ; X86-NEXT: jmp .LBB11_3 ; X86-NEXT: .LBB11_2: # %compare ; X86-NEXT: movdqa %xmm1, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: .LBB11_3: # %exit ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: leal -4(%ebp), %esp @@ -772,11 +770,10 @@ ; X86-NEXT: andl $-16, %esp ; X86-NEXT: subl $48, %esp ; X86-NEXT: movdqa %xmm0, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm1, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: movl %ebp, %esp ; X86-NEXT: popl %ebp ; X86-NEXT: movzwl %ax, %eax @@ -889,11 +886,10 @@ ; X86-NEXT: jmp .LBB23_3 ; X86-NEXT: .LBB23_2: # %compare ; X86-NEXT: movdqa %xmm1, (%esp) -; X86-NEXT: addl %ecx, %ecx -; X86-NEXT: andl $14, %ecx -; X86-NEXT: movzwl (%esp,%ecx), %eax +; X86-NEXT: andl $7, %ecx +; X86-NEXT: movzwl (%esp,%ecx,2), %eax ; X86-NEXT: movdqa %xmm0, {{[0-9]+}}(%esp) -; X86-NEXT: subw 16(%esp,%ecx), %ax +; X86-NEXT: subw 16(%esp,%ecx,2), %ax ; X86-NEXT: .LBB23_3: # %exit ; X86-NEXT: movzwl %ax, %eax ; X86-NEXT: movl %ebp, %esp diff --git a/llvm/test/CodeGen/X86/tbm_patterns.ll b/llvm/test/CodeGen/X86/tbm_patterns.ll --- a/llvm/test/CodeGen/X86/tbm_patterns.ll +++ b/llvm/test/CodeGen/X86/tbm_patterns.ll @@ -4,7 +4,8 @@ define i32 @test_x86_tbm_bextri_u32(i32 %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 %t1 = and i32 %t0, 4095 @@ -26,7 +27,8 @@ define i32 @test_x86_tbm_bextri_u32_m(ptr nocapture %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_m: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, (%rdi), %eax # imm = 0xC04 +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = load i32, ptr %a %t1 = lshr i32 %t0, 4 @@ -37,7 +39,8 @@ define i32 @test_x86_tbm_bextri_u32_z(i32 %a, i32 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u32_z: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: cmovel %esi, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 @@ -51,7 +54,7 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u32_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: testl $65520, %edi # imm = 0xFFF0 ; CHECK-NEXT: cmovnel %edx, %eax ; CHECK-NEXT: retq %t0 = lshr i32 %a, 4 @@ -65,7 +68,8 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u32_sle: ; CHECK: # %bb.0: ; CHECK-NEXT: movl %esi, %eax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %ecx +; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: testl %ecx, %ecx ; CHECK-NEXT: cmovgl %edx, %eax ; CHECK-NEXT: retq @@ -79,7 +83,8 @@ define i64 @test_x86_tbm_bextri_u64(i64 %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 %t1 = and i64 %t0, 4095 @@ -101,7 +106,8 @@ define i64 @test_x86_tbm_bextri_u64_m(ptr nocapture %a) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_m: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, (%rdi), %eax # imm = 0xC04 +; CHECK-NEXT: movzwl (%rdi), %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: retq %t0 = load i64, ptr %a %t1 = lshr i64 %t0, 4 @@ -112,7 +118,8 @@ define i64 @test_x86_tbm_bextri_u64_z(i64 %a, i64 %b) nounwind { ; CHECK-LABEL: test_x86_tbm_bextri_u64_z: ; CHECK: # %bb.0: -; CHECK-NEXT: bextrl $3076, %edi, %eax # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %eax +; CHECK-NEXT: shrl $4, %eax ; CHECK-NEXT: cmoveq %rsi, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 @@ -126,7 +133,7 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u64_z2: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: testl $65520, %edi # imm = 0xFFF0 ; CHECK-NEXT: cmovneq %rdx, %rax ; CHECK-NEXT: retq %t0 = lshr i64 %a, 4 @@ -140,7 +147,8 @@ ; CHECK-LABEL: test_x86_tbm_bextri_u64_sle: ; CHECK: # %bb.0: ; CHECK-NEXT: movq %rsi, %rax -; CHECK-NEXT: bextrl $3076, %edi, %ecx # imm = 0xC04 +; CHECK-NEXT: movzwl %di, %ecx +; CHECK-NEXT: shrl $4, %ecx ; CHECK-NEXT: testq %rcx, %rcx ; CHECK-NEXT: cmovgq %rdx, %rax ; CHECK-NEXT: retq diff --git a/llvm/test/CodeGen/X86/udiv_fix.ll b/llvm/test/CodeGen/X86/udiv_fix.ll --- a/llvm/test/CodeGen/X86/udiv_fix.ll +++ b/llvm/test/CodeGen/X86/udiv_fix.ll @@ -90,9 +90,8 @@ ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %eax, %eax -; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx diff --git a/llvm/test/CodeGen/X86/udiv_fix_sat.ll b/llvm/test/CodeGen/X86/udiv_fix_sat.ll --- a/llvm/test/CodeGen/X86/udiv_fix_sat.ll +++ b/llvm/test/CodeGen/X86/udiv_fix_sat.ll @@ -27,8 +27,7 @@ ; X86-LABEL: func: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx -; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movzwl %ax, %eax +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax ; X86-NEXT: shll $8, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: divl %ecx @@ -109,9 +108,8 @@ ; X86-LABEL: func3: ; X86: # %bb.0: ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: addl %eax, %eax -; X86-NEXT: movzbl %cl, %ecx +; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: shll $4, %ecx ; X86-NEXT: # kill: def $ax killed $ax killed $eax ; X86-NEXT: xorl %edx, %edx @@ -284,15 +282,14 @@ ; X86-LABEL: func7: ; X86: # %bb.0: ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax -; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx ; X86-NEXT: movl %ecx, %edx -; X86-NEXT: shll $17, %edx -; X86-NEXT: shrl $15, %ecx -; X86-NEXT: andl $1, %ecx +; X86-NEXT: shrl $15, %edx +; X86-NEXT: shll $17, %ecx ; X86-NEXT: pushl $0 ; X86-NEXT: pushl %eax -; X86-NEXT: pushl %ecx ; X86-NEXT: pushl %edx +; X86-NEXT: pushl %ecx ; X86-NEXT: calll __udivdi3 ; X86-NEXT: addl $16, %esp ; X86-NEXT: cmpl $131071, %eax # imm = 0x1FFFF diff --git a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll --- a/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll +++ b/llvm/test/CodeGen/X86/urem-seteq-illegal-types.ll @@ -34,8 +34,8 @@ ; X86-NEXT: imull $115043767, {{[0-9]+}}(%esp), %eax # imm = 0x6DB6DB7 ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $26, %ecx -; X86-NEXT: andl $134217726, %eax # imm = 0x7FFFFFE ; X86-NEXT: shrl %eax +; X86-NEXT: andl $67108863, %eax # imm = 0x3FFFFFF ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF ; X86-NEXT: cmpl $9586981, %eax # imm = 0x924925 @@ -47,8 +47,8 @@ ; X64-NEXT: imull $115043767, %edi, %eax # imm = 0x6DB6DB7 ; X64-NEXT: movl %eax, %ecx ; X64-NEXT: shll $26, %ecx -; X64-NEXT: andl $134217726, %eax # imm = 0x7FFFFFE ; X64-NEXT: shrl %eax +; X64-NEXT: andl $67108863, %eax # imm = 0x3FFFFFF ; X64-NEXT: orl %ecx, %eax ; X64-NEXT: andl $134217727, %eax # imm = 0x7FFFFFF ; X64-NEXT: cmpl $9586981, %eax # imm = 0x924925 @@ -111,8 +111,8 @@ ; X86-NEXT: imull $683, {{[0-9]+}}(%esp), %eax # imm = 0x2AB ; X86-NEXT: movl %eax, %ecx ; X86-NEXT: shll $10, %ecx -; X86-NEXT: andl $2046, %eax # imm = 0x7FE ; X86-NEXT: shrl %eax +; X86-NEXT: andl $1023, %eax # imm = 0x3FF ; X86-NEXT: orl %ecx, %eax ; X86-NEXT: andl $2047, %eax # imm = 0x7FF ; X86-NEXT: cmpl $342, %eax # imm = 0x156 diff --git a/llvm/test/CodeGen/X86/vselect.ll b/llvm/test/CodeGen/X86/vselect.ll --- a/llvm/test/CodeGen/X86/vselect.ll +++ b/llvm/test/CodeGen/X86/vselect.ll @@ -652,8 +652,8 @@ ; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero ; SSE-NEXT: pcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 ; SSE-NEXT: movq %xmm0, %rax -; SSE-NEXT: andl $1, %eax -; SSE-NEXT: shlq $15, %rax +; SSE-NEXT: shll $15, %eax +; SSE-NEXT: movzwl %ax, %eax ; SSE-NEXT: retq ; ; AVX-LABEL: vselect_any_extend_vector_inreg_crash: @@ -661,8 +661,8 @@ ; AVX-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero ; AVX-NEXT: vpcmpeqb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: vmovq %xmm0, %rax -; AVX-NEXT: andl $1, %eax -; AVX-NEXT: shlq $15, %rax +; AVX-NEXT: shll $15, %eax +; AVX-NEXT: movzwl %ax, %eax ; AVX-NEXT: retq 0: %1 = load <8 x i8>, ptr %x diff --git a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll --- a/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll +++ b/llvm/test/CodeGen/X86/zext-logicop-shift-load.ll @@ -7,8 +7,8 @@ ; X86: # %bb.0: # %entry ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax ; X86-NEXT: movzbl (%eax), %eax +; X86-NEXT: andl $15, %eax ; X86-NEXT: shll $2, %eax -; X86-NEXT: andl $60, %eax ; X86-NEXT: xorl %edx, %edx ; X86-NEXT: retl ;