Index: llvm/trunk/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/TargetLowering.h +++ llvm/trunk/include/llvm/CodeGen/TargetLowering.h @@ -539,6 +539,12 @@ return hasAndNotCompare(X); } + /// Return true if the target has a bit-test instruction: + /// (X & (1 << Y)) ==/!= 0 + /// This knowledge can be used to prevent breaking the pattern, + /// or creating it if it could be recognized. + virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } + /// There are two ways to clear extreme bits (either low or high): /// Mask: x & (-1 << y) (the instcombine canonical form) /// Shifts: x >> y << y @@ -571,6 +577,38 @@ return false; } + /// Given the pattern + /// (X & (C l>>/<< Y)) ==/!= 0 + /// return true if it should be transformed into: + /// ((X <> Y) & C) ==/!= 0 + /// WARNING: if 'X' is a constant, the fold may deadlock! + /// FIXME: we could avoid passing XC, but we can't use isConstOrConstSplat() + /// here because it can end up being not linked in. + virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + if (hasBitTest(X, Y)) { + // One interesting pattern that we'd want to form is 'bit test': + // ((1 << Y) & C) ==/!= 0 + // But we also need to be careful not to try to reverse that fold. + + // Is this '1 << Y' ? + if (OldShiftOpcode == ISD::SHL && CC->isOne()) + return false; // Keep the 'bit test' pattern. + + // Will it be '1 << Y' after the transform ? + if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) + return true; // Do form the 'bit test' pattern. + } + + // If 'X' is a constant, and we transform, then we will immediately + // try to undo the fold, thus causing endless combine loop. + // So by default, let's assume everyone prefers the fold + // iff 'X' is not a constant. + return !XC; + } + /// These two forms are equivalent: /// sub %y, (xor %x, -1) /// add (add %x, 1), %y @@ -4108,6 +4146,11 @@ DAGCombinerInfo &DCI, const SDLoc &DL) const; + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const; + SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL, Index: llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ llvm/trunk/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2760,6 +2760,77 @@ return T2; } +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI, &DAG](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -3328,6 +3399,14 @@ } } + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. // TODO: Support this for vectors after legalize ops. Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.h @@ -488,6 +488,11 @@ return VT.getSizeInBits() >= 64; // vector 'bic' } + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; + bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const override { if (DAG.getMachineFunction().getFunction().hasMinSize()) return false; Index: llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/trunk/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -12042,6 +12042,19 @@ return Mask->getValue().isPowerOf2(); } +bool AArch64TargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // Does baseline recommend not to perform the fold by default? + if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) + return false; + // Else, if this is a vector shift, prefer 'shl'. + return X.getValueType().isScalarInteger() || NewShiftOpcode == ISD::SHL; +} + void AArch64TargetLowering::initializeSplitCSR(MachineBasicBlock *Entry) const { // Update IsSplitCSR in AArch64unctionInfo. AArch64FunctionInfo *AFI = Entry->getParent()->getInfo(); Index: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h +++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.h @@ -127,6 +127,8 @@ bool isCheapToSpeculateCtlz() const override { return true; } bool isCtlzFast() const override { return true; } + bool hasBitTest(SDValue X, SDValue Y) const override; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// Return true if an FMA operation is faster than a pair of mul and add Index: llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp +++ llvm/trunk/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1817,6 +1817,10 @@ return false; } +bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'tstbit' +} + bool HexagonTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const { return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2)); } Index: llvm/trunk/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.h +++ llvm/trunk/lib/Target/X86/X86ISelLowering.h @@ -840,6 +840,13 @@ bool hasAndNot(SDValue Y) const override; + bool hasBitTest(SDValue X, SDValue Y) const override; + + bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; Index: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp +++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp @@ -5022,6 +5022,33 @@ return Subtarget.hasSSE2(); } +bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'bt' +} + +bool X86TargetLowering:: + shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode, + SelectionDAG &DAG) const { + // Does baseline recommend not to perform the fold by default? + if (!TargetLowering::shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode, DAG)) + return false; + // For scalars this transform is always beneficial. + if (X.getValueType().isScalarInteger()) + return true; + // If all the shift amounts are identical, then transform is beneficial even + // with rudimentary SSE2 shifts. + if (DAG.isSplatValue(Y, /*AllowUndefs=*/true)) + return true; + // If we have AVX2 with it's powerful shift operations, then it's also good. + if (Subtarget.hasAVX2()) + return true; + // Pre-AVX2 vector codegen for this pattern is best for variant with 'shl'. + return NewShiftOpcode == ISD::SHL; +} + bool X86TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && Index: llvm/trunk/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ llvm/trunk/test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -15,11 +15,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x80 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 128, %y @@ -31,11 +29,9 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 1, %y @@ -47,11 +43,9 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x18 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 24, %y @@ -65,11 +59,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x8000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i16 32768, %y @@ -81,11 +73,9 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i16 1, %y @@ -97,11 +87,9 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4080 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0xff0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i16 4080, %y @@ -115,9 +103,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x80000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 2147483648, %y @@ -129,9 +116,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 1, %y @@ -143,9 +129,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16776960 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0xffff00 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 16776960, %y @@ -159,9 +144,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 -; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tst x8, #0x8000000000000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i64 9223372036854775808, %y @@ -173,9 +157,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tst x8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i64 1, %y @@ -187,9 +170,8 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474976645120 -; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tst x8, #0xffffffff0000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i64 281474976645120, %y @@ -205,10 +187,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_splat_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -238,10 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -288,12 +268,9 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: ubfx w0, w8, #7, #1 ; CHECK-NEXT: ret %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x Index: llvm/trunk/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ llvm/trunk/test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -15,11 +15,10 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x80 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 128, %y @@ -31,11 +30,10 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 1, %y @@ -47,11 +45,10 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x18 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 24, %y @@ -65,11 +62,10 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-32768 +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x8000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i16 32768, %y @@ -81,11 +77,10 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i16 1, %y @@ -97,11 +92,10 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4080 +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0xff0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i16 4080, %y @@ -115,9 +109,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: tst w8, #0x80000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i32 2147483648, %y @@ -129,9 +122,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i32 1, %y @@ -143,9 +135,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16776960 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: tst w8, #0xffff00 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i32 16776960, %y @@ -159,9 +150,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: tst x8, #0x8000000000000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i64 9223372036854775808, %y @@ -173,9 +163,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: tst x8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i64 1, %y @@ -187,9 +176,8 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474976645120 -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: tst x8, #0xffffffff0000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i64 281474976645120, %y @@ -283,12 +271,10 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: ubfx w0, w8, #7, #1 ; CHECK-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x Index: llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll +++ llvm/trunk/test/CodeGen/AMDGPU/commute-shifts.ll @@ -16,10 +16,10 @@ ; SI-NEXT: s_mov_b32 s7, s0 ; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; SI-NEXT: v_and_b32_e32 v0, 7, v0 -; SI-NEXT: v_lshl_b32_e32 v0, 1, v0 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_and_b32_e32 v0, v2, v0 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; SI-NEXT: v_and_b32_e32 v0, 1, v0 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, v0, v0 ; SI-NEXT: ; return to shader part epilog @@ -37,10 +37,10 @@ ; VI-NEXT: s_mov_b32 s7, s0 ; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; VI-NEXT: v_and_b32_e32 v0, 7, v0 -; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_and_b32_e32 v0, v2, v0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; VI-NEXT: v_and_b32_e32 v0, 1, v0 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; VI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, v0, v0 ; VI-NEXT: ; return to shader part epilog Index: llvm/trunk/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ llvm/trunk/test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -21,20 +21,18 @@ ; ARM-LABEL: scalar_i8_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mov r2, #128 -; ARM-NEXT: and r0, r0, r2, lsr r1 +; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: mov r1, #1 ; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: eor r0, r1, r0, lsr #7 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #128 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #128 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -42,12 +40,10 @@ ; THUMB78-LABEL: scalar_i8_signbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #128 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 ; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7 ; THUMB78-NEXT: bx lr %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x @@ -60,19 +56,15 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsl r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -80,12 +72,9 @@ ; THUMB78-LABEL: scalar_i8_lowestbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i8 1, %y %t1 = and i8 %t0, %x @@ -98,8 +87,7 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: mov r2, #24 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: and r0, r2, r0, lsl r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -107,10 +95,9 @@ ; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #24 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #24 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -118,10 +105,8 @@ ; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #24 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: and r0, r0, #24 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 ; THUMB78-NEXT: bx lr @@ -137,21 +122,19 @@ ; ARM-LABEL: scalar_i16_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: mov r2, #32768 -; ARM-NEXT: and r0, r0, r2, lsr r1 +; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: mov r1, #1 ; ARM-NEXT: uxth r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: eor r0, r1, r0, lsr #15 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r2, #15 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #15 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -159,12 +142,10 @@ ; THUMB78-LABEL: scalar_i16_signbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: mov.w r2, #32768 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 ; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15 ; THUMB78-NEXT: bx lr %t0 = lshr i16 32768, %y %t1 = and i16 %t0, %x @@ -177,19 +158,15 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxth r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsl r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -197,12 +174,9 @@ ; THUMB78-LABEL: scalar_i16_lowestbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i16 1, %y %t1 = and i16 %t0, %x @@ -215,8 +189,7 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: mov r2, #4080 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: and r0, r2, r0, lsl r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -224,11 +197,10 @@ ; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #255 -; THUMB6-NEXT: lsls r2, r2, #4 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #255 +; THUMB6-NEXT: lsls r1, r1, #4 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -236,10 +208,8 @@ ; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: mov.w r2, #4080 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: and r0, r0, #4080 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 ; THUMB78-NEXT: bx lr @@ -254,29 +224,25 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; ARM-LABEL: scalar_i32_signbit_eq: ; ARM: @ %bb.0: -; ARM-NEXT: mov r2, #-2147483648 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mvn r0, r0, lsl r1 +; ARM-NEXT: lsr r0, r0, #31 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r2, #31 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #31 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_signbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: mov.w r2, #-2147483648 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: mvns r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #31 ; THUMB78-NEXT: bx lr %t0 = lshr i32 2147483648, %y %t1 = and i32 %t0, %x @@ -288,27 +254,23 @@ ; ARM-LABEL: scalar_i32_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsl r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_lowestbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i32 1, %y %t1 = and i32 %t0, %x @@ -321,7 +283,7 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: mov r2, #65280 ; ARM6-NEXT: orr r2, r2, #16711680 -; ARM6-NEXT: and r0, r0, r2, lsr r1 +; ARM6-NEXT: and r0, r2, r0, lsl r1 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 ; ARM6-NEXT: bx lr @@ -330,18 +292,18 @@ ; ARM78: @ %bb.0: ; ARM78-NEXT: movw r2, #65280 ; ARM78-NEXT: movt r2, #255 -; ARM78-NEXT: and r0, r0, r2, lsr r1 +; ARM78-NEXT: and r0, r2, r0, lsl r1 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: ldr r2, .LCPI8_0 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: ldr r1, .LCPI8_0 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: @@ -350,9 +312,9 @@ ; ; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movw r2, #65280 -; THUMB78-NEXT: movt r2, #255 -; THUMB78-NEXT: lsr.w r1, r2, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movw r1, #65280 +; THUMB78-NEXT: movt r1, #255 ; THUMB78-NEXT: ands r0, r1 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 @@ -366,95 +328,54 @@ ; i64 scalar define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { -; ARM6-LABEL: scalar_i64_signbit_eq: -; ARM6: @ %bb.0: -; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mov r12, #-2147483648 -; ARM6-NEXT: subs lr, r2, #32 -; ARM6-NEXT: lsr r3, r12, r2 -; ARM6-NEXT: rsb r2, r2, #32 -; ARM6-NEXT: movpl r3, #0 -; ARM6-NEXT: and r1, r3, r1 -; ARM6-NEXT: lsl r2, r12, r2 -; ARM6-NEXT: lsrpl r2, r12, lr -; ARM6-NEXT: and r0, r2, r0 -; ARM6-NEXT: orr r0, r0, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r11, pc} -; -; ARM78-LABEL: scalar_i64_signbit_eq: -; ARM78: @ %bb.0: -; ARM78-NEXT: push {r11, lr} -; ARM78-NEXT: mov r12, #-2147483648 -; ARM78-NEXT: subs lr, r2, #32 -; ARM78-NEXT: lsr r3, r12, r2 -; ARM78-NEXT: rsb r2, r2, #32 -; ARM78-NEXT: movwpl r3, #0 -; ARM78-NEXT: and r1, r3, r1 -; ARM78-NEXT: lsl r2, r12, r2 -; ARM78-NEXT: lsrpl r2, r12, lr -; ARM78-NEXT: and r0, r2, r0 -; ARM78-NEXT: orr r0, r0, r1 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r11, pc} +; ARM-LABEL: scalar_i64_signbit_eq: +; ARM: @ %bb.0: +; ARM-NEXT: rsb r3, r2, #32 +; ARM-NEXT: lsr r3, r0, r3 +; ARM-NEXT: orr r1, r3, r1, lsl r2 +; ARM-NEXT: subs r2, r2, #32 +; ARM-NEXT: lslpl r1, r0, r2 +; ARM-NEXT: mvn r0, r1 +; ARM-NEXT: lsr r0, r0, #31 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __ashldi3 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: lsls r1, r0, #31 -; THUMB6-NEXT: movs r0, #0 -; THUMB6-NEXT: bl __lshrdi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 -; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsls r2, r0, #31 +; THUMB6-NEXT: ands r2, r1 +; THUMB6-NEXT: rsbs r0, r2, #0 +; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: pop {r7, pc} ; ; THUMB7-LABEL: scalar_i64_signbit_eq: ; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} ; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: mov.w r12, #-2147483648 -; THUMB7-NEXT: subs.w lr, r2, #32 -; THUMB7-NEXT: lsr.w r2, r12, r2 -; THUMB7-NEXT: lsl.w r3, r12, r3 +; THUMB7-NEXT: lsls r1, r2 +; THUMB7-NEXT: subs r2, #32 +; THUMB7-NEXT: lsr.w r3, r0, r3 +; THUMB7-NEXT: orr.w r1, r1, r3 ; THUMB7-NEXT: it pl -; THUMB7-NEXT: lsrpl.w r3, r12, lr -; THUMB7-NEXT: it pl -; THUMB7-NEXT: movpl r2, #0 -; THUMB7-NEXT: ands r0, r3 -; THUMB7-NEXT: ands r1, r2 -; THUMB7-NEXT: orrs r0, r1 -; THUMB7-NEXT: clz r0, r0 -; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} +; THUMB7-NEXT: lslpl.w r1, r0, r2 +; THUMB7-NEXT: mvns r0, r1 +; THUMB7-NEXT: lsrs r0, r0, #31 +; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i64_signbit_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: subs.w r3, r2, #32 -; THUMB8-NEXT: mov.w r12, #-2147483648 -; THUMB8-NEXT: lsr.w lr, r12, r3 ; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: lsr.w r2, r12, r2 -; THUMB8-NEXT: lsl.w r3, r12, r3 -; THUMB8-NEXT: it pl -; THUMB8-NEXT: movpl r3, lr -; THUMB8-NEXT: it pl -; THUMB8-NEXT: movpl r2, #0 -; THUMB8-NEXT: ands r0, r3 -; THUMB8-NEXT: ands r1, r2 -; THUMB8-NEXT: orrs r0, r1 -; THUMB8-NEXT: clz r0, r0 -; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: lsls r1, r2 +; THUMB8-NEXT: lsr.w r3, r0, r3 +; THUMB8-NEXT: orrs r1, r3 +; THUMB8-NEXT: subs r2, #32 +; THUMB8-NEXT: lsl.w r0, r0, r2 +; THUMB8-NEXT: it mi +; THUMB8-NEXT: movmi r0, r1 +; THUMB8-NEXT: mvns r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #31 +; THUMB8-NEXT: bx lr %t0 = lshr i64 9223372036854775808, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -464,51 +385,40 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_lowestbit_eq: ; ARM6: @ %bb.0: +; ARM6-NEXT: subs r1, r2, #32 +; ARM6-NEXT: lsl r0, r0, r2 +; ARM6-NEXT: movpl r0, #0 ; ARM6-NEXT: mov r1, #1 -; ARM6-NEXT: lsr r1, r1, r2 -; ARM6-NEXT: subs r2, r2, #32 -; ARM6-NEXT: movpl r1, #0 -; ARM6-NEXT: and r0, r1, r0 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 +; ARM6-NEXT: bic r0, r1, r0 ; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_lowestbit_eq: ; ARM78: @ %bb.0: +; ARM78-NEXT: subs r1, r2, #32 +; ARM78-NEXT: lsl r0, r0, r2 +; ARM78-NEXT: movwpl r0, #0 ; ARM78-NEXT: mov r1, #1 -; ARM78-NEXT: lsr r1, r1, r2 -; ARM78-NEXT: subs r2, r2, #32 -; ARM78-NEXT: movwpl r1, #0 -; ARM78-NEXT: and r0, r1, r0 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 +; ARM78-NEXT: bic r0, r1, r0 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: movs r1, #0 -; THUMB6-NEXT: bl __lshrdi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __ashldi3 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; ; THUMB78-LABEL: scalar_i64_lowestbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r1, #1 -; THUMB78-NEXT: lsrs r1, r2 -; THUMB78-NEXT: subs r2, #32 +; THUMB78-NEXT: lsls r0, r2 +; THUMB78-NEXT: subs.w r1, r2, #32 ; THUMB78-NEXT: it pl -; THUMB78-NEXT: movpl r1, #0 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: movpl r0, #0 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i64 1, %y %t1 = and i64 %t0, %x @@ -519,115 +429,82 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mov r12, #255 -; ARM6-NEXT: subs lr, r2, #32 -; ARM6-NEXT: orr r12, r12, #65280 -; ARM6-NEXT: lsr r3, r12, r2 -; ARM6-NEXT: movpl r3, #0 -; ARM6-NEXT: and r1, r3, r1 -; ARM6-NEXT: mov r3, #16711680 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: orr r3, r3, #-16777216 -; ARM6-NEXT: lsr r3, r3, r2 -; ARM6-NEXT: rsb r2, r2, #32 -; ARM6-NEXT: orr r2, r3, r12, lsl r2 -; ARM6-NEXT: lsrpl r2, r12, lr -; ARM6-NEXT: and r0, r2, r0 -; ARM6-NEXT: orr r0, r0, r1 +; ARM6-NEXT: rsb r3, r2, #32 +; ARM6-NEXT: lsr r3, r0, r3 +; ARM6-NEXT: orr r1, r3, r1, lsl r2 +; ARM6-NEXT: subs r3, r2, #32 +; ARM6-NEXT: lslpl r1, r0, r3 +; ARM6-NEXT: lsl r0, r0, r2 +; ARM6-NEXT: movpl r0, #0 +; ARM6-NEXT: pkhbt r0, r1, r0 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r11, pc} +; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: push {r11, lr} -; ARM78-NEXT: movw r12, #65535 -; ARM78-NEXT: subs lr, r2, #32 -; ARM78-NEXT: lsr r3, r12, r2 -; ARM78-NEXT: movwpl r3, #0 -; ARM78-NEXT: and r1, r3, r1 -; ARM78-NEXT: movw r3, #0 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: movt r3, #65535 -; ARM78-NEXT: lsr r3, r3, r2 -; ARM78-NEXT: rsb r2, r2, #32 -; ARM78-NEXT: orr r2, r3, r12, lsl r2 -; ARM78-NEXT: lsrpl r2, r12, lr -; ARM78-NEXT: and r0, r2, r0 -; ARM78-NEXT: orr r0, r0, r1 +; ARM78-NEXT: rsb r3, r2, #32 +; ARM78-NEXT: lsr r3, r0, r3 +; ARM78-NEXT: orr r1, r3, r1, lsl r2 +; ARM78-NEXT: subs r3, r2, #32 +; ARM78-NEXT: lslpl r1, r0, r3 +; ARM78-NEXT: lsl r0, r0, r2 +; ARM78-NEXT: movwpl r0, #0 +; ARM78-NEXT: pkhbt r0, r1, r0 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r11, pc} +; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: ldr r0, .LCPI11_0 -; THUMB6-NEXT: ldr r1, .LCPI11_1 -; THUMB6-NEXT: bl __lshrdi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __ashldi3 +; THUMB6-NEXT: ldr r2, .LCPI11_0 +; THUMB6-NEXT: ands r2, r0 +; THUMB6-NEXT: uxth r0, r1 +; THUMB6-NEXT: adds r1, r2, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI11_0: ; THUMB6-NEXT: .long 4294901760 @ 0xffff0000 -; THUMB6-NEXT: .LCPI11_1: -; THUMB6-NEXT: .long 65535 @ 0xffff ; ; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} -; THUMB7-NEXT: movs r3, #0 -; THUMB7-NEXT: movw lr, #65535 -; THUMB7-NEXT: movt r3, #65535 -; THUMB7-NEXT: lsr.w r12, r3, r2 ; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: lsl.w r3, lr, r3 -; THUMB7-NEXT: orr.w r12, r12, r3 +; THUMB7-NEXT: lsls r1, r2 +; THUMB7-NEXT: lsr.w r3, r0, r3 +; THUMB7-NEXT: orrs r1, r3 ; THUMB7-NEXT: subs.w r3, r2, #32 -; THUMB7-NEXT: lsr.w r2, lr, r2 ; THUMB7-NEXT: it pl -; THUMB7-NEXT: lsrpl.w r12, lr, r3 +; THUMB7-NEXT: lslpl.w r1, r0, r3 +; THUMB7-NEXT: lsl.w r0, r0, r2 ; THUMB7-NEXT: it pl -; THUMB7-NEXT: movpl r2, #0 -; THUMB7-NEXT: and.w r0, r0, r12 -; THUMB7-NEXT: ands r1, r2 -; THUMB7-NEXT: orrs r0, r1 +; THUMB7-NEXT: movpl r0, #0 +; THUMB7-NEXT: pkhbt r0, r1, r0 ; THUMB7-NEXT: clz r0, r0 ; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} +; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: movs r3, #0 -; THUMB8-NEXT: movw lr, #65535 -; THUMB8-NEXT: movt r3, #65535 -; THUMB8-NEXT: lsr.w r12, r3, r2 ; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: lsl.w r3, lr, r3 -; THUMB8-NEXT: orr.w r12, r12, r3 +; THUMB8-NEXT: lsls r1, r2 +; THUMB8-NEXT: lsr.w r3, r0, r3 +; THUMB8-NEXT: orrs r1, r3 ; THUMB8-NEXT: subs.w r3, r2, #32 -; THUMB8-NEXT: lsr.w r2, lr, r2 -; THUMB8-NEXT: lsr.w r3, lr, r3 +; THUMB8-NEXT: lsl.w r3, r0, r3 +; THUMB8-NEXT: lsl.w r0, r0, r2 ; THUMB8-NEXT: it mi -; THUMB8-NEXT: movmi r3, r12 +; THUMB8-NEXT: movmi r3, r1 ; THUMB8-NEXT: it pl -; THUMB8-NEXT: movpl r2, #0 -; THUMB8-NEXT: ands r0, r3 -; THUMB8-NEXT: ands r1, r2 -; THUMB8-NEXT: orrs r0, r1 +; THUMB8-NEXT: movpl r0, #0 +; THUMB8-NEXT: pkhbt r0, r3, r0 ; THUMB8-NEXT: clz r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: bx lr %t0 = lshr i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -644,33 +521,24 @@ ; ARM6-NEXT: push {r11, lr} ; ARM6-NEXT: ldr r12, [sp, #8] ; ARM6-NEXT: mov lr, #1 -; ARM6-NEXT: and r0, r0, lr, lsr r12 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #12] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r1, r1, lr, lsr r12 +; ARM6-NEXT: bic r1, lr, r1, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #16] -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: and r2, r2, lr, lsr r12 +; ARM6-NEXT: bic r2, lr, r2, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #20] -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: and r3, r3, lr, lsr r12 -; ARM6-NEXT: lsr r2, r2, #5 -; ARM6-NEXT: clz r3, r3 -; ARM6-NEXT: lsr r3, r3, #5 +; ARM6-NEXT: bic r3, lr, r3, lsl r12 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_splat_eq: ; ARM78: @ %bb.0: +; ARM78-NEXT: vmov d17, r2, r3 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d16, d17}, [r12] -; ARM78-NEXT: vmov.i32 q9, #0x1 -; ARM78-NEXT: vneg.s32 q8, q8 -; ARM78-NEXT: vshl.u32 q8, q9, q8 -; ARM78-NEXT: vmov d19, r2, r3 -; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d18, d19}, [r12] +; ARM78-NEXT: vmov d16, r0, r1 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -678,43 +546,39 @@ ; ; THUMB6-LABEL: vec_4xi32_splat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r6, lr} -; THUMB6-NEXT: ldr r5, [sp, #16] +; THUMB6-NEXT: push {r4, r5, r7, lr} +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsls r0, r4 ; THUMB6-NEXT: movs r4, #1 -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsrs r6, r5 -; THUMB6-NEXT: ands r6, r0 -; THUMB6-NEXT: rsbs r0, r6, #0 -; THUMB6-NEXT: adcs r0, r6 +; THUMB6-NEXT: ands r0, r4 +; THUMB6-NEXT: rsbs r5, r0, #0 +; THUMB6-NEXT: adcs r0, r5 ; THUMB6-NEXT: ldr r5, [sp, #20] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsrs r6, r5 -; THUMB6-NEXT: ands r6, r1 -; THUMB6-NEXT: rsbs r1, r6, #0 -; THUMB6-NEXT: adcs r1, r6 +; THUMB6-NEXT: lsls r1, r5 +; THUMB6-NEXT: ands r1, r4 +; THUMB6-NEXT: rsbs r5, r1, #0 +; THUMB6-NEXT: adcs r1, r5 ; THUMB6-NEXT: ldr r5, [sp, #24] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsrs r6, r5 -; THUMB6-NEXT: ands r6, r2 -; THUMB6-NEXT: rsbs r2, r6, #0 -; THUMB6-NEXT: adcs r2, r6 +; THUMB6-NEXT: lsls r2, r5 +; THUMB6-NEXT: ands r2, r4 +; THUMB6-NEXT: rsbs r5, r2, #0 +; THUMB6-NEXT: adcs r2, r5 ; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsrs r4, r5 -; THUMB6-NEXT: ands r4, r3 -; THUMB6-NEXT: rsbs r3, r4, #0 +; THUMB6-NEXT: lsls r3, r5 +; THUMB6-NEXT: ands r3, r4 +; THUMB6-NEXT: rsbs r4, r3, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r6, pc} +; THUMB6-NEXT: pop {r4, r5, r7, pc} ; ; THUMB78-LABEL: vec_4xi32_splat_eq: ; THUMB78: @ %bb.0: +; THUMB78-NEXT: vmov d17, r2, r3 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] -; THUMB78-NEXT: vmov.i32 q9, #0x1 -; THUMB78-NEXT: vneg.s32 q8, q8 -; THUMB78-NEXT: vshl.u32 q8, q9, q8 -; THUMB78-NEXT: vmov d19, r2, r3 -; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] +; THUMB78-NEXT: vmov d16, r0, r1 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -730,20 +594,16 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: ldr r12, [sp, #4] ; ARM6-NEXT: mov r0, #1 -; ARM6-NEXT: and r0, r1, r0, lsr r12 +; ARM6-NEXT: bic r1, r0, r1, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r1, r0, #5 ; ARM6-NEXT: mov r0, #65280 ; ARM6-NEXT: orr r0, r0, #16711680 -; ARM6-NEXT: and r0, r2, r0, lsr r12 -; ARM6-NEXT: ldr r12, [sp, #12] +; ARM6-NEXT: and r0, r0, r2, lsl r12 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r2, r0, #5 -; ARM6-NEXT: mov r0, #-2147483648 -; ARM6-NEXT: and r0, r3, r0, lsr r12 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r3, r0, #5 +; ARM6-NEXT: ldr r0, [sp, #12] +; ARM6-NEXT: mvn r0, r3, lsl r0 +; ARM6-NEXT: lsr r3, r0, #31 ; ARM6-NEXT: mov r0, #1 ; ARM6-NEXT: bx lr ; @@ -772,27 +632,26 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r0, [sp, #12] +; THUMB6-NEXT: lsls r1, r0 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #24] -; THUMB6-NEXT: ldr r5, .LCPI13_0 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r2 -; THUMB6-NEXT: rsbs r2, r5, #0 -; THUMB6-NEXT: adcs r2, r5 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsls r2, r4 +; THUMB6-NEXT: ldr r4, .LCPI13_0 +; THUMB6-NEXT: ands r4, r2 +; THUMB6-NEXT: rsbs r2, r4, #0 +; THUMB6-NEXT: adcs r2, r4 +; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: lsls r3, r4 ; THUMB6-NEXT: lsls r4, r0, #31 -; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsrs r4, r5 ; THUMB6-NEXT: ands r4, r3 ; THUMB6-NEXT: rsbs r3, r4, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI13_0: @@ -833,29 +692,22 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsr r2 +; ARM6-NEXT: bic r1, lr, r1, lsl r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsr r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsr r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 +; ARM6-NEXT: bic r3, lr, r3, lsl r2 ; ARM6-NEXT: mov r2, #1 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; ARM78: @ %bb.0: +; ARM78-NEXT: vmov d17, r2, r3 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d16, d17}, [r12] -; ARM78-NEXT: vmov.i32 q9, #0x1 -; ARM78-NEXT: vneg.s32 q8, q8 -; ARM78-NEXT: vshl.u32 q8, q9, q8 -; ARM78-NEXT: vmov d19, r2, r3 -; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d18, d19}, [r12] +; ARM78-NEXT: vmov d16, r0, r1 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -863,38 +715,34 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsls r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsls r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r3 -; THUMB6-NEXT: rsbs r3, r5, #0 -; THUMB6-NEXT: adcs r3, r5 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsls r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r4, r3, #0 +; THUMB6-NEXT: adcs r3, r4 +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB78: @ %bb.0: +; THUMB78-NEXT: vmov d17, r2, r3 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] -; THUMB78-NEXT: vmov.i32 q9, #0x1 -; THUMB78-NEXT: vneg.s32 q8, q8 -; THUMB78-NEXT: vshl.u32 q8, q9, q8 -; THUMB78-NEXT: vmov d19, r2, r3 -; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] +; THUMB78-NEXT: vmov d16, r0, r1 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -911,16 +759,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsr r2 +; ARM6-NEXT: bic r1, lr, r1, lsl r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsr r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsr r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 +; ARM6-NEXT: bic r3, lr, r3, lsl r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq: @@ -940,26 +782,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsls r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsls r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsrs r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsls r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB78: @ %bb.0: @@ -987,16 +827,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsr r2 +; ARM6-NEXT: bic r1, lr, r1, lsl r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsr r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsr r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 +; ARM6-NEXT: bic r3, lr, r3, lsl r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq: @@ -1016,26 +850,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsls r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsls r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsrs r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsls r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB78: @ %bb.0: @@ -1062,48 +894,21 @@ ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; ARM6-LABEL: scalar_i8_signbit_ne: -; ARM6: @ %bb.0: -; ARM6-NEXT: uxtb r1, r1 -; ARM6-NEXT: mov r2, #128 -; ARM6-NEXT: and r0, r0, r2, lsr r1 -; ARM6-NEXT: uxtb r0, r0 -; ARM6-NEXT: cmp r0, #0 -; ARM6-NEXT: movne r0, #1 -; ARM6-NEXT: bx lr -; -; ARM78-LABEL: scalar_i8_signbit_ne: -; ARM78: @ %bb.0: -; ARM78-NEXT: uxtb r1, r1 -; ARM78-NEXT: mov r2, #128 -; ARM78-NEXT: and r0, r0, r2, lsr r1 -; ARM78-NEXT: uxtb r0, r0 -; ARM78-NEXT: cmp r0, #0 -; ARM78-NEXT: movwne r0, #1 -; ARM78-NEXT: bx lr -; -; THUMB6-LABEL: scalar_i8_signbit_ne: -; THUMB6: @ %bb.0: -; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #128 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r0, r2 -; THUMB6-NEXT: subs r1, r0, #1 -; THUMB6-NEXT: sbcs r0, r1 -; THUMB6-NEXT: bx lr +; ARM-LABEL: scalar_i8_signbit_ne: +; ARM: @ %bb.0: +; ARM-NEXT: uxtb r1, r1 +; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: lsr r0, r0, #7 +; ARM-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_signbit_ne: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #128 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: cmp r0, #0 -; THUMB78-NEXT: it ne -; THUMB78-NEXT: movne r0, #1 -; THUMB78-NEXT: bx lr +; THUMB-LABEL: scalar_i8_signbit_ne: +; THUMB: @ %bb.0: +; THUMB-NEXT: uxtb r1, r1 +; THUMB-NEXT: lsls r0, r1 +; THUMB-NEXT: uxtb r0, r0 +; THUMB-NEXT: lsrs r0, r0, #7 +; THUMB-NEXT: bx lr %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate Index: llvm/trunk/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ llvm/trunk/test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -21,35 +21,43 @@ ; ARM-LABEL: scalar_i8_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mvn r2, #127 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: lsr r0, r0, r1 +; ARM-NEXT: mov r1, #1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: eor r0, r1, r0, lsr #7 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #127 -; THUMB6-NEXT: mvns r2, r2 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #128 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_signbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: mvn r2, #127 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_signbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: eor.w r0, r1, r0, lsr #7 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_signbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: eor.w r0, r1, r0, lsr #7 +; THUMB8-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -60,34 +68,39 @@ ; ARM-LABEL: scalar_i8_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: bic r0, r2, r0, lsr r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_lowestbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_lowestbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: bic.w r0, r1, r0 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_lowestbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: bic.w r0, r1, r0 +; THUMB8-NEXT: bx lr %t0 = shl i8 1, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -98,9 +111,9 @@ ; ARM-LABEL: scalar_i8_bitsinmiddle_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mov r2, #24 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: mov r2, #24 +; ARM-NEXT: and r0, r2, r0, lsr r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -108,24 +121,33 @@ ; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #24 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #24 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #24 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_bitsinmiddle_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: and r0, r0, #24 +; THUMB7-NEXT: clz r0, r0 +; THUMB7-NEXT: lsrs r0, r0, #5 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_bitsinmiddle_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: and r0, r0, #24 +; THUMB8-NEXT: clz r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #5 +; THUMB8-NEXT: bx lr %t0 = shl i8 24, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -135,57 +157,47 @@ ; i16 scalar define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { -; ARM6-LABEL: scalar_i16_signbit_eq: -; ARM6: @ %bb.0: -; ARM6-NEXT: ldr r2, .LCPI3_0 -; ARM6-NEXT: uxth r1, r1 -; ARM6-NEXT: and r0, r0, r2, lsl r1 -; ARM6-NEXT: uxth r0, r0 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: bx lr -; ARM6-NEXT: .p2align 2 -; ARM6-NEXT: @ %bb.1: -; ARM6-NEXT: .LCPI3_0: -; ARM6-NEXT: .long 4294934528 @ 0xffff8000 -; -; ARM78-LABEL: scalar_i16_signbit_eq: -; ARM78: @ %bb.0: -; ARM78-NEXT: movw r2, #32768 -; ARM78-NEXT: uxth r1, r1 -; ARM78-NEXT: movt r2, #65535 -; ARM78-NEXT: and r0, r0, r2, lsl r1 -; ARM78-NEXT: uxth r0, r0 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: bx lr +; ARM-LABEL: scalar_i16_signbit_eq: +; ARM: @ %bb.0: +; ARM-NEXT: uxth r1, r1 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: lsr r0, r0, r1 +; ARM-NEXT: mov r1, #1 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: eor r0, r1, r0, lsr #15 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: ldr r2, .LCPI3_0 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: uxth r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #15 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr -; THUMB6-NEXT: .p2align 2 -; THUMB6-NEXT: @ %bb.1: -; THUMB6-NEXT: .LCPI3_0: -; THUMB6-NEXT: .long 4294934528 @ 0xffff8000 ; -; THUMB78-LABEL: scalar_i16_signbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: movw r2, #32768 -; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: movt r2, #65535 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i16_signbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxth r1, r1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: eor.w r0, r1, r0, lsr #15 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i16_signbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: uxth r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: eor.w r0, r1, r0, lsr #15 +; THUMB8-NEXT: bx lr %t0 = shl i16 32768, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -196,34 +208,39 @@ ; ARM-LABEL: scalar_i16_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxth r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: bic r0, r2, r0, lsr r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: uxth r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i16_lowestbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i16_lowestbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxth r1, r1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: bic.w r0, r1, r0 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i16_lowestbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: uxth r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: bic.w r0, r1, r0 +; THUMB8-NEXT: bx lr %t0 = shl i16 1, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -234,9 +251,9 @@ ; ARM-LABEL: scalar_i16_bitsinmiddle_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: mov r2, #4080 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: mov r2, #4080 +; ARM-NEXT: and r0, r2, r0, lsr r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -244,25 +261,34 @@ ; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #255 -; THUMB6-NEXT: lsls r2, r2, #4 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: uxth r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #255 +; THUMB6-NEXT: lsls r1, r1, #4 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: mov.w r2, #4080 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i16_bitsinmiddle_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxth r1, r1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: and r0, r0, #4080 +; THUMB7-NEXT: clz r0, r0 +; THUMB7-NEXT: lsrs r0, r0, #5 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i16_bitsinmiddle_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: uxth r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: and r0, r0, #4080 +; THUMB8-NEXT: clz r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #5 +; THUMB8-NEXT: bx lr %t0 = shl i16 4080, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -274,29 +300,25 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; ARM-LABEL: scalar_i32_signbit_eq: ; ARM: @ %bb.0: -; ARM-NEXT: mov r2, #-2147483648 -; ARM-NEXT: and r0, r0, r2, lsl r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mvn r0, r0, lsr r1 +; ARM-NEXT: lsr r0, r0, #31 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r2, #31 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #31 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_signbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: mov.w r2, #-2147483648 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsrs r0, r1 +; THUMB78-NEXT: mvns r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #31 ; THUMB78-NEXT: bx lr %t0 = shl i32 2147483648, %y %t1 = and i32 %t0, %x @@ -308,27 +330,23 @@ ; ARM-LABEL: scalar_i32_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsl r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsr r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_lowestbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsrs r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = shl i32 1, %y %t1 = and i32 %t0, %x @@ -341,7 +359,7 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: mov r2, #65280 ; ARM6-NEXT: orr r2, r2, #16711680 -; ARM6-NEXT: and r0, r0, r2, lsl r1 +; ARM6-NEXT: and r0, r2, r0, lsr r1 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 ; ARM6-NEXT: bx lr @@ -350,18 +368,18 @@ ; ARM78: @ %bb.0: ; ARM78-NEXT: movw r2, #65280 ; ARM78-NEXT: movt r2, #255 -; ARM78-NEXT: and r0, r0, r2, lsl r1 +; ARM78-NEXT: and r0, r2, r0, lsr r1 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: ldr r2, .LCPI8_0 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: ldr r1, .LCPI8_0 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: @@ -370,9 +388,9 @@ ; ; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movw r2, #65280 -; THUMB78-NEXT: movt r2, #255 -; THUMB78-NEXT: lsl.w r1, r2, r1 +; THUMB78-NEXT: lsrs r0, r1 +; THUMB78-NEXT: movw r1, #65280 +; THUMB78-NEXT: movt r1, #255 ; THUMB78-NEXT: ands r0, r1 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 @@ -388,52 +406,41 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_signbit_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: mov r0, #-2147483648 -; ARM6-NEXT: lsl r0, r0, r2 -; ARM6-NEXT: subs r2, r2, #32 +; ARM6-NEXT: lsr r0, r1, r2 +; ARM6-NEXT: subs r1, r2, #32 ; ARM6-NEXT: movpl r0, #0 -; ARM6-NEXT: and r0, r0, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 +; ARM6-NEXT: mvn r0, r0 +; ARM6-NEXT: lsr r0, r0, #31 ; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_signbit_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: mov r0, #-2147483648 -; ARM78-NEXT: lsl r0, r0, r2 -; ARM78-NEXT: subs r2, r2, #32 +; ARM78-NEXT: lsr r0, r1, r2 +; ARM78-NEXT: subs r1, r2, #32 ; ARM78-NEXT: movwpl r0, #0 -; ARM78-NEXT: and r0, r0, r1 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 +; ARM78-NEXT: mvn r0, r0 +; ARM78-NEXT: lsr r0, r0, #31 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __lshrdi3 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: lsls r1, r0, #31 -; THUMB6-NEXT: movs r0, #0 -; THUMB6-NEXT: bl __ashldi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 -; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsls r2, r0, #31 +; THUMB6-NEXT: ands r2, r1 +; THUMB6-NEXT: rsbs r0, r2, #0 +; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: pop {r7, pc} ; ; THUMB78-LABEL: scalar_i64_signbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: mov.w r0, #-2147483648 -; THUMB78-NEXT: lsls r0, r2 -; THUMB78-NEXT: subs r2, #32 +; THUMB78-NEXT: lsr.w r0, r1, r2 +; THUMB78-NEXT: subs.w r1, r2, #32 ; THUMB78-NEXT: it pl ; THUMB78-NEXT: movpl r0, #0 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: mvns r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #31 ; THUMB78-NEXT: bx lr %t0 = shl i64 9223372036854775808, %y %t1 = and i64 %t0, %x @@ -442,94 +449,53 @@ } define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { -; ARM6-LABEL: scalar_i64_lowestbit_eq: -; ARM6: @ %bb.0: -; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mov r12, #1 -; ARM6-NEXT: subs lr, r2, #32 -; ARM6-NEXT: lsl r3, r12, r2 -; ARM6-NEXT: rsb r2, r2, #32 -; ARM6-NEXT: movpl r3, #0 -; ARM6-NEXT: and r0, r3, r0 -; ARM6-NEXT: lsr r2, r12, r2 -; ARM6-NEXT: lslpl r2, r12, lr -; ARM6-NEXT: and r1, r2, r1 -; ARM6-NEXT: orr r0, r0, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r11, pc} -; -; ARM78-LABEL: scalar_i64_lowestbit_eq: -; ARM78: @ %bb.0: -; ARM78-NEXT: push {r11, lr} -; ARM78-NEXT: mov r12, #1 -; ARM78-NEXT: subs lr, r2, #32 -; ARM78-NEXT: lsl r3, r12, r2 -; ARM78-NEXT: rsb r2, r2, #32 -; ARM78-NEXT: movwpl r3, #0 -; ARM78-NEXT: and r0, r3, r0 -; ARM78-NEXT: lsr r2, r12, r2 -; ARM78-NEXT: lslpl r2, r12, lr -; ARM78-NEXT: and r1, r2, r1 -; ARM78-NEXT: orr r0, r0, r1 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r11, pc} +; ARM-LABEL: scalar_i64_lowestbit_eq: +; ARM: @ %bb.0: +; ARM-NEXT: rsb r3, r2, #32 +; ARM-NEXT: lsr r0, r0, r2 +; ARM-NEXT: subs r2, r2, #32 +; ARM-NEXT: orr r0, r0, r1, lsl r3 +; ARM-NEXT: lsrpl r0, r1, r2 +; ARM-NEXT: mov r1, #1 +; ARM-NEXT: bic r0, r1, r0 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: movs r1, #0 -; THUMB6-NEXT: bl __ashldi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __lshrdi3 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; ; THUMB7-LABEL: scalar_i64_lowestbit_eq: ; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} ; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: mov.w r12, #1 -; THUMB7-NEXT: subs.w lr, r2, #32 -; THUMB7-NEXT: lsl.w r2, r12, r2 -; THUMB7-NEXT: lsr.w r3, r12, r3 -; THUMB7-NEXT: it pl -; THUMB7-NEXT: lslpl.w r3, r12, lr +; THUMB7-NEXT: lsrs r0, r2 +; THUMB7-NEXT: subs r2, #32 +; THUMB7-NEXT: lsl.w r3, r1, r3 +; THUMB7-NEXT: orr.w r0, r0, r3 ; THUMB7-NEXT: it pl -; THUMB7-NEXT: movpl r2, #0 -; THUMB7-NEXT: ands r1, r3 -; THUMB7-NEXT: ands r0, r2 -; THUMB7-NEXT: orrs r0, r1 -; THUMB7-NEXT: clz r0, r0 -; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} +; THUMB7-NEXT: lsrpl.w r0, r1, r2 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: bic.w r0, r1, r0 +; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i64_lowestbit_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: subs.w r3, r2, #32 -; THUMB8-NEXT: mov.w r12, #1 -; THUMB8-NEXT: lsl.w lr, r12, r3 ; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: lsl.w r2, r12, r2 -; THUMB8-NEXT: lsr.w r3, r12, r3 -; THUMB8-NEXT: it pl -; THUMB8-NEXT: movpl r3, lr -; THUMB8-NEXT: it pl -; THUMB8-NEXT: movpl r2, #0 -; THUMB8-NEXT: ands r1, r3 -; THUMB8-NEXT: ands r0, r2 -; THUMB8-NEXT: orrs r0, r1 -; THUMB8-NEXT: clz r0, r0 -; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: lsrs r0, r2 +; THUMB8-NEXT: lsl.w r3, r1, r3 +; THUMB8-NEXT: orrs r0, r3 +; THUMB8-NEXT: subs r2, #32 +; THUMB8-NEXT: lsr.w r1, r1, r2 +; THUMB8-NEXT: it mi +; THUMB8-NEXT: movmi r1, r0 +; THUMB8-NEXT: movs r0, #1 +; THUMB8-NEXT: bics r0, r1 +; THUMB8-NEXT: bx lr %t0 = shl i64 1, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -539,115 +505,82 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: push {r4, lr} -; ARM6-NEXT: mov r12, #16711680 -; ARM6-NEXT: subs lr, r2, #32 -; ARM6-NEXT: orr r12, r12, #-16777216 -; ARM6-NEXT: mov r4, #255 -; ARM6-NEXT: orr r4, r4, #65280 -; ARM6-NEXT: lsl r3, r12, r2 -; ARM6-NEXT: movpl r3, #0 -; ARM6-NEXT: and r0, r3, r0 ; ARM6-NEXT: rsb r3, r2, #32 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: lsr r3, r12, r3 -; ARM6-NEXT: orr r2, r3, r4, lsl r2 -; ARM6-NEXT: lslpl r2, r12, lr -; ARM6-NEXT: and r1, r2, r1 -; ARM6-NEXT: orr r0, r0, r1 +; ARM6-NEXT: lsr r0, r0, r2 +; ARM6-NEXT: orr r0, r0, r1, lsl r3 +; ARM6-NEXT: subs r3, r2, #32 +; ARM6-NEXT: lsrpl r0, r1, r3 +; ARM6-NEXT: lsr r1, r1, r2 +; ARM6-NEXT: movpl r1, #0 +; ARM6-NEXT: pkhbt r0, r1, r0 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r4, pc} +; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: push {r4, lr} -; ARM78-NEXT: movw r12, #0 -; ARM78-NEXT: subs lr, r2, #32 -; ARM78-NEXT: movt r12, #65535 -; ARM78-NEXT: movw r4, #65535 -; ARM78-NEXT: lsl r3, r12, r2 -; ARM78-NEXT: movwpl r3, #0 -; ARM78-NEXT: and r0, r3, r0 ; ARM78-NEXT: rsb r3, r2, #32 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: lsr r3, r12, r3 -; ARM78-NEXT: orr r2, r3, r4, lsl r2 -; ARM78-NEXT: lslpl r2, r12, lr -; ARM78-NEXT: and r1, r2, r1 -; ARM78-NEXT: orr r0, r0, r1 +; ARM78-NEXT: lsr r0, r0, r2 +; ARM78-NEXT: orr r0, r0, r1, lsl r3 +; ARM78-NEXT: subs r3, r2, #32 +; ARM78-NEXT: lsrpl r0, r1, r3 +; ARM78-NEXT: lsr r1, r1, r2 +; ARM78-NEXT: movwpl r1, #0 +; ARM78-NEXT: pkhbt r0, r1, r0 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r4, pc} +; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: ldr r0, .LCPI11_0 -; THUMB6-NEXT: ldr r1, .LCPI11_1 -; THUMB6-NEXT: bl __ashldi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __lshrdi3 +; THUMB6-NEXT: ldr r2, .LCPI11_0 +; THUMB6-NEXT: ands r2, r0 +; THUMB6-NEXT: uxth r0, r1 +; THUMB6-NEXT: adds r1, r2, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI11_0: ; THUMB6-NEXT: .long 4294901760 @ 0xffff0000 -; THUMB6-NEXT: .LCPI11_1: -; THUMB6-NEXT: .long 65535 @ 0xffff ; ; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} -; THUMB7-NEXT: movw r3, #65535 -; THUMB7-NEXT: movw lr, #0 -; THUMB7-NEXT: lsl.w r12, r3, r2 ; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: movt lr, #65535 -; THUMB7-NEXT: lsr.w r3, lr, r3 -; THUMB7-NEXT: orr.w r12, r12, r3 +; THUMB7-NEXT: lsrs r0, r2 +; THUMB7-NEXT: lsl.w r3, r1, r3 +; THUMB7-NEXT: orrs r0, r3 ; THUMB7-NEXT: subs.w r3, r2, #32 -; THUMB7-NEXT: lsl.w r2, lr, r2 ; THUMB7-NEXT: it pl -; THUMB7-NEXT: lslpl.w r12, lr, r3 +; THUMB7-NEXT: lsrpl.w r0, r1, r3 +; THUMB7-NEXT: lsr.w r1, r1, r2 ; THUMB7-NEXT: it pl -; THUMB7-NEXT: movpl r2, #0 -; THUMB7-NEXT: and.w r1, r1, r12 -; THUMB7-NEXT: ands r0, r2 -; THUMB7-NEXT: orrs r0, r1 +; THUMB7-NEXT: movpl r1, #0 +; THUMB7-NEXT: pkhbt r0, r1, r0 ; THUMB7-NEXT: clz r0, r0 ; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} +; THUMB7-NEXT: bx lr ; ; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: movw r3, #65535 -; THUMB8-NEXT: movw lr, #0 -; THUMB8-NEXT: lsl.w r12, r3, r2 ; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: movt lr, #65535 -; THUMB8-NEXT: lsr.w r3, lr, r3 -; THUMB8-NEXT: orr.w r12, r12, r3 +; THUMB8-NEXT: lsrs r0, r2 +; THUMB8-NEXT: lsl.w r3, r1, r3 +; THUMB8-NEXT: orrs r0, r3 ; THUMB8-NEXT: subs.w r3, r2, #32 -; THUMB8-NEXT: lsl.w r2, lr, r2 -; THUMB8-NEXT: lsl.w r3, lr, r3 +; THUMB8-NEXT: lsr.w r3, r1, r3 ; THUMB8-NEXT: it mi -; THUMB8-NEXT: movmi r3, r12 +; THUMB8-NEXT: movmi r3, r0 +; THUMB8-NEXT: lsr.w r0, r1, r2 ; THUMB8-NEXT: it pl -; THUMB8-NEXT: movpl r2, #0 -; THUMB8-NEXT: ands r1, r3 -; THUMB8-NEXT: ands r0, r2 -; THUMB8-NEXT: orrs r0, r1 +; THUMB8-NEXT: movpl r0, #0 +; THUMB8-NEXT: pkhbt r0, r0, r3 ; THUMB8-NEXT: clz r0, r0 ; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB8-NEXT: bx lr %t0 = shl i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -664,32 +597,25 @@ ; ARM6-NEXT: push {r11, lr} ; ARM6-NEXT: ldr r12, [sp, #8] ; ARM6-NEXT: mov lr, #1 -; ARM6-NEXT: and r0, r0, lr, lsl r12 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #12] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r1, r1, lr, lsl r12 +; ARM6-NEXT: bic r1, lr, r1, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #16] -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: and r2, r2, lr, lsl r12 +; ARM6-NEXT: bic r2, lr, r2, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #20] -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: and r3, r3, lr, lsl r12 -; ARM6-NEXT: lsr r2, r2, #5 -; ARM6-NEXT: clz r3, r3 -; ARM6-NEXT: lsr r3, r3, #5 +; ARM6-NEXT: bic r3, lr, r3, lsr r12 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_splat_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: vmov.i32 q8, #0x1 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d16, d17}, [r12] ; ARM78-NEXT: vmov d19, r2, r3 +; ARM78-NEXT: vneg.s32 q8, q8 ; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q9, q8 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -697,42 +623,40 @@ ; ; THUMB6-LABEL: vec_4xi32_splat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r6, lr} -; THUMB6-NEXT: ldr r5, [sp, #16] +; THUMB6-NEXT: push {r4, r5, r7, lr} +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsrs r0, r4 ; THUMB6-NEXT: movs r4, #1 -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsls r6, r5 -; THUMB6-NEXT: ands r6, r0 -; THUMB6-NEXT: rsbs r0, r6, #0 -; THUMB6-NEXT: adcs r0, r6 +; THUMB6-NEXT: ands r0, r4 +; THUMB6-NEXT: rsbs r5, r0, #0 +; THUMB6-NEXT: adcs r0, r5 ; THUMB6-NEXT: ldr r5, [sp, #20] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsls r6, r5 -; THUMB6-NEXT: ands r6, r1 -; THUMB6-NEXT: rsbs r1, r6, #0 -; THUMB6-NEXT: adcs r1, r6 +; THUMB6-NEXT: lsrs r1, r5 +; THUMB6-NEXT: ands r1, r4 +; THUMB6-NEXT: rsbs r5, r1, #0 +; THUMB6-NEXT: adcs r1, r5 ; THUMB6-NEXT: ldr r5, [sp, #24] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsls r6, r5 -; THUMB6-NEXT: ands r6, r2 -; THUMB6-NEXT: rsbs r2, r6, #0 -; THUMB6-NEXT: adcs r2, r6 +; THUMB6-NEXT: lsrs r2, r5 +; THUMB6-NEXT: ands r2, r4 +; THUMB6-NEXT: rsbs r5, r2, #0 +; THUMB6-NEXT: adcs r2, r5 ; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsls r4, r5 -; THUMB6-NEXT: ands r4, r3 -; THUMB6-NEXT: rsbs r3, r4, #0 +; THUMB6-NEXT: lsrs r3, r5 +; THUMB6-NEXT: ands r3, r4 +; THUMB6-NEXT: rsbs r4, r3, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r6, pc} +; THUMB6-NEXT: pop {r4, r5, r7, pc} ; ; THUMB78-LABEL: vec_4xi32_splat_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: vmov.i32 q8, #0x1 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] ; THUMB78-NEXT: vmov d19, r2, r3 +; THUMB78-NEXT: vneg.s32 q8, q8 ; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q9, q8 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -748,20 +672,16 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: ldr r12, [sp, #4] ; ARM6-NEXT: mov r0, #1 -; ARM6-NEXT: and r0, r1, r0, lsl r12 +; ARM6-NEXT: bic r1, r0, r1, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r1, r0, #5 ; ARM6-NEXT: mov r0, #65280 ; ARM6-NEXT: orr r0, r0, #16711680 -; ARM6-NEXT: and r0, r2, r0, lsl r12 -; ARM6-NEXT: ldr r12, [sp, #12] +; ARM6-NEXT: and r0, r0, r2, lsr r12 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r2, r0, #5 -; ARM6-NEXT: mov r0, #-2147483648 -; ARM6-NEXT: and r0, r3, r0, lsl r12 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r3, r0, #5 +; ARM6-NEXT: ldr r0, [sp, #12] +; ARM6-NEXT: mvn r0, r3, lsr r0 +; ARM6-NEXT: lsr r3, r0, #31 ; ARM6-NEXT: mov r0, #1 ; ARM6-NEXT: bx lr ; @@ -789,27 +709,26 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r0, [sp, #12] +; THUMB6-NEXT: lsrs r1, r0 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #24] -; THUMB6-NEXT: ldr r5, .LCPI13_0 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r2 -; THUMB6-NEXT: rsbs r2, r5, #0 -; THUMB6-NEXT: adcs r2, r5 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsrs r2, r4 +; THUMB6-NEXT: ldr r4, .LCPI13_0 +; THUMB6-NEXT: ands r4, r2 +; THUMB6-NEXT: rsbs r2, r4, #0 +; THUMB6-NEXT: adcs r2, r4 +; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: lsrs r3, r4 ; THUMB6-NEXT: lsls r4, r0, #31 -; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsls r4, r5 ; THUMB6-NEXT: ands r4, r3 ; THUMB6-NEXT: rsbs r3, r4, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI13_0: @@ -849,28 +768,23 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsl r2 +; ARM6-NEXT: bic r1, lr, r1, lsr r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsl r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsl r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 +; ARM6-NEXT: bic r3, lr, r3, lsr r2 ; ARM6-NEXT: mov r2, #1 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: vmov.i32 q8, #0x1 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d16, d17}, [r12] ; ARM78-NEXT: vmov d19, r2, r3 +; ARM78-NEXT: vneg.s32 q8, q8 ; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q9, q8 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -878,37 +792,35 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsrs r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsrs r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r3 -; THUMB6-NEXT: rsbs r3, r5, #0 -; THUMB6-NEXT: adcs r3, r5 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsrs r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r4, r3, #0 +; THUMB6-NEXT: adcs r3, r4 +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: vmov.i32 q8, #0x1 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] ; THUMB78-NEXT: vmov d19, r2, r3 +; THUMB78-NEXT: vneg.s32 q8, q8 ; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q9, q8 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -925,16 +837,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsl r2 +; ARM6-NEXT: bic r1, lr, r1, lsr r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsl r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsl r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 +; ARM6-NEXT: bic r3, lr, r3, lsr r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq: @@ -953,26 +859,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsrs r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsrs r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsls r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsrs r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB78: @ %bb.0: @@ -999,16 +903,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsl r2 +; ARM6-NEXT: bic r1, lr, r1, lsr r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsl r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsl r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 +; ARM6-NEXT: bic r3, lr, r3, lsr r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq: @@ -1027,26 +925,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsrs r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsrs r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsls r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsrs r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB78: @ %bb.0: @@ -1072,49 +968,41 @@ ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; ARM6-LABEL: scalar_i8_signbit_ne: -; ARM6: @ %bb.0: -; ARM6-NEXT: uxtb r1, r1 -; ARM6-NEXT: mvn r2, #127 -; ARM6-NEXT: and r0, r0, r2, lsl r1 -; ARM6-NEXT: uxtb r0, r0 -; ARM6-NEXT: cmp r0, #0 -; ARM6-NEXT: movne r0, #1 -; ARM6-NEXT: bx lr -; -; ARM78-LABEL: scalar_i8_signbit_ne: -; ARM78: @ %bb.0: -; ARM78-NEXT: uxtb r1, r1 -; ARM78-NEXT: mvn r2, #127 -; ARM78-NEXT: and r0, r0, r2, lsl r1 -; ARM78-NEXT: uxtb r0, r0 -; ARM78-NEXT: cmp r0, #0 -; ARM78-NEXT: movwne r0, #1 -; ARM78-NEXT: bx lr +; ARM-LABEL: scalar_i8_signbit_ne: +; ARM: @ %bb.0: +; ARM-NEXT: uxtb r1, r1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: lsr r0, r0, r1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: lsr r0, r0, #7 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_ne: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #127 -; THUMB6-NEXT: mvns r2, r2 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r0, r2 -; THUMB6-NEXT: subs r1, r0, #1 -; THUMB6-NEXT: sbcs r0, r1 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r0, #7 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_signbit_ne: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: mvn r2, #127 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: cmp r0, #0 -; THUMB78-NEXT: it ne -; THUMB78-NEXT: movne r0, #1 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_signbit_ne: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r0, #7 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_signbit_ne: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #7 +; THUMB8-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate Index: llvm/trunk/test/CodeGen/PowerPC/shift-cmp.ll =================================================================== --- llvm/trunk/test/CodeGen/PowerPC/shift-cmp.ll +++ llvm/trunk/test/CodeGen/PowerPC/shift-cmp.ll @@ -6,8 +6,7 @@ define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp_variable_power_of_two: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 32 -; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31 +; CHECK-NEXT: srw 3, 3, 4 ; CHECK-NEXT: blr %shl = shl i32 1, %y %and = and i32 %x, %shl @@ -18,8 +17,7 @@ define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) { ; CHECK-LABEL: and_cmp_variable_power_of_two_64: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 64 -; CHECK-NEXT: rldcl 3, 3, 4, 63 +; CHECK-NEXT: srd 3, 3, 4 ; CHECK-NEXT: blr %shl = shl i64 1, %y %and = and i64 %x, %shl @@ -30,9 +28,8 @@ define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) { ; CHECK-LABEL: and_ncmp_variable_power_of_two: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 32 -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31 +; CHECK-NEXT: srw 3, 3, 4 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %shl = shl i32 1, %y %and = and i32 %x, %shl @@ -43,9 +40,8 @@ define i1 @and_ncmp_variable_power_of_two_64(i64 %x, i64 %y) { ; CHECK-LABEL: and_ncmp_variable_power_of_two_64: ; CHECK: # %bb.0: -; CHECK-NEXT: not 3, 3 -; CHECK-NEXT: subfic 4, 4, 64 -; CHECK-NEXT: rldcl 3, 3, 4, 63 +; CHECK-NEXT: srd 3, 3, 4 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %shl = shl i64 1, %y %and = and i64 %x, %shl Index: llvm/trunk/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ llvm/trunk/test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -23,19 +23,18 @@ ; X86-LABEL: scalar_i8_signbit_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: testb $-128, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: testb $-128, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = lshr i8 128, %y @@ -48,19 +47,18 @@ ; X86-LABEL: scalar_i8_lowestbit_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $1, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: testb $1, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_lowestbit_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $1, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: testb $1, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = lshr i8 1, %y @@ -73,19 +71,18 @@ ; X86-LABEL: scalar_i8_bitsinmiddle_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $24, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: testb $24, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_bitsinmiddle_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $24, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: testb $24, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = lshr i8 24, %y @@ -100,36 +97,33 @@ ; X86-BMI1-LABEL: scalar_i16_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $32768, %eax # imm = 0x8000 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $32768, %eax # imm = 0x8000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $32768, %edi # imm = 0x8000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32768, %eax # imm = 0x8000 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i16 32768, %y @@ -142,36 +136,33 @@ ; X86-BMI1-LABEL: scalar_i16_lowestbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $1, %eax -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $1, %al ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_lowestbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testb $1, %al ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_lowestbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $1, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testb $1, %dil ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_lowestbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testb $1, %al ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i16 1, %y @@ -184,36 +175,33 @@ ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $4080, %edi # imm = 0xFF0 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i16 4080, %y @@ -228,36 +216,33 @@ ; X86-BMI1-LABEL: scalar_i32_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i32 2147483648, %y @@ -270,36 +255,33 @@ ; X86-BMI1-LABEL: scalar_i32_lowestbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $1, %eax -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $1, %al ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_lowestbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testb $1, %al ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_lowestbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $1, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testb $1, %dil ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_lowestbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testb $1, %al ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i32 1, %y @@ -312,36 +294,33 @@ ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i32 16776960, %y @@ -357,55 +336,44 @@ ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI1-NEXT: xorl %edx, %edx -; X86-BMI1-NEXT: xorl %esi, %esi -; X86-BMI1-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %eax, %edx ; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: cmovnel %eax, %esi -; X86-BMI1-NEXT: cmovnel %edx, %eax -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: orl %esi, %eax +; X86-BMI1-NEXT: cmovnel %esi, %edx +; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: popl %esi ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i64_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: xorl %esi, %esi -; X86-BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: cmovnel %eax, %esi -; X86-BMI2-NEXT: cmovnel %edx, %eax -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: orl %esi, %eax +; X86-BMI2-NEXT: cmovel %edx, %eax +; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al -; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i64_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shlq %cl, %rdi +; X64-BMI1-NEXT: shrq $63, %rdi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shrq $63, %rax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 9223372036854775808, %y @@ -415,34 +383,42 @@ } define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { -; X86-LABEL: scalar_i64_lowestbit_eq: -; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl $1, %edx -; X86-NEXT: shrdl %cl, %eax, %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: cmovnel %eax, %edx -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl $0, %edx -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-BMI1-LABEL: scalar_i64_lowestbit_eq: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: cmovel %eax, %edx +; X86-BMI1-NEXT: testb $1, %dl +; X86-BMI1-NEXT: sete %al +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: scalar_i64_lowestbit_eq: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: cmovel %ecx, %edx +; X86-BMI2-NEXT: testb $1, %dl +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i64_lowestbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movl $1, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shlq %cl, %rdi +; X64-BMI1-NEXT: testb $1, %dil ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_lowestbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: testb $1, %al ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 1, %y @@ -456,17 +432,18 @@ ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-BMI1-NEXT: movl $-65536, %edx # imm = 0xFFFF0000 -; X86-BMI1-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax ; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: cmovnel %eax, %edx -; X86-BMI1-NEXT: cmovel %eax, %esi -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: orl %edx, %esi +; X86-BMI1-NEXT: cmovnel %esi, %edx +; X86-BMI1-NEXT: movzwl %dx, %ecx +; X86-BMI1-NEXT: cmovel %esi, %eax +; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-BMI1-NEXT: orl %ecx, %eax ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: popl %esi ; X86-BMI1-NEXT: retl @@ -475,17 +452,17 @@ ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-BMI2-NEXT: movl $-65536, %edx # imm = 0xFFFF0000 -; X86-BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: xorl %esi, %esi ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: cmovnel %eax, %edx +; X86-BMI2-NEXT: movzwl %dx, %ecx ; X86-BMI2-NEXT: cmovel %eax, %esi -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: andl $-65536, %esi # imm = 0xFFFF0000 +; X86-BMI2-NEXT: orl %ecx, %esi ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl @@ -493,18 +470,18 @@ ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shlq %cl, %rdi +; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 +; X64-BMI1-NEXT: testq %rax, %rdi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 +; X64-BMI2-NEXT: testq %rcx, %rax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 281474976645120, %y @@ -518,37 +495,48 @@ ;------------------------------------------------------------------------------; define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; SSE2-LABEL: vec_4xi32_splat_eq: -; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1] -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: psrld %xmm2, %xmm5 -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: psrld %xmm1, %xmm3 -; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1] -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3] -; SSE2-NEXT: andps %xmm5, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: ret{{[l|q]}} +; X86-SSE2-LABEL: vec_4xi32_splat_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pxor %xmm2, %xmm2 +; X86-SSE2-NEXT: pslld $23, %xmm1 +; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X86-SSE2-NEXT: retl ; ; AVX2-LABEL: vec_4xi32_splat_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} +; +; X64-SSE2-LABEL: vec_4xi32_splat_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pxor %xmm2, %xmm2 +; X64-SSE2-NEXT: pslld $23, %xmm1 +; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X64-SSE2-NEXT: retq %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -594,37 +582,48 @@ } define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: -; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = <1,1,u,1> -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: psrld %xmm2, %xmm5 -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: psrld %xmm1, %xmm3 -; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1] -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3] -; SSE2-NEXT: andps %xmm5, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: ret{{[l|q]}} +; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pxor %xmm2, %xmm2 +; X86-SSE2-NEXT: pslld $23, %xmm1 +; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X86-SSE2-NEXT: retl ; ; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} +; +; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pxor %xmm2, %xmm2 +; X64-SSE2-NEXT: pslld $23, %xmm1 +; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X64-SSE2-NEXT: retq %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -713,20 +712,19 @@ ; X86-LABEL: scalar_i8_signbit_ne: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) -; X86-NEXT: setne %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: shrb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_ne: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al -; X64-NEXT: setne %al +; X64-NEXT: shlb %cl, %al +; X64-NEXT: shrb $7, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x Index: llvm/trunk/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll =================================================================== --- llvm/trunk/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ llvm/trunk/test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -23,19 +23,18 @@ ; X86-LABEL: scalar_i8_signbit_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb %cl, %al +; X86-NEXT: testb $-128, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: testb $-128, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = shl i8 128, %y @@ -68,19 +67,18 @@ ; X86-LABEL: scalar_i8_bitsinmiddle_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $24, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb %cl, %al +; X86-NEXT: testb $24, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_bitsinmiddle_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $24, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: testb $24, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = shl i8 24, %y @@ -95,36 +93,36 @@ ; X86-BMI1-LABEL: scalar_i16_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $-32768, %ecx # imm = 0x8000 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000 +; X64-BMI1-NEXT: movzwl %di, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $-32768, %eax # imm = 0x8000 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: movzwl %di, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i16 32768, %y @@ -157,36 +155,36 @@ ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 +; X64-BMI1-NEXT: movzwl %di, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: movzwl %di, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i16 4080, %y @@ -201,36 +199,33 @@ ; X86-BMI1-LABEL: scalar_i32_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shrl %cl, %edi +; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i32 2147483648, %y @@ -263,36 +258,33 @@ ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shrl %cl, %edi +; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i32 16776960, %y @@ -304,35 +296,43 @@ ; i64 scalar define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { -; X86-LABEL: scalar_i64_signbit_eq: -; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000 -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: cmovnel %eax, %edx -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl $0, %edx -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-BMI1-LABEL: scalar_i64_signbit_eq: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: cmovel %eax, %edx +; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000 +; X86-BMI1-NEXT: sete %al +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: scalar_i64_signbit_eq: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: cmovel %ecx, %edx +; X86-BMI2-NEXT: testl $-2147483648, %edx # imm = 0x80000000 +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i64_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shlq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax -; X64-BMI1-NEXT: sete %al +; X64-BMI1-NEXT: shrq %cl, %rdi +; X64-BMI1-NEXT: btq $63, %rdi +; X64-BMI1-NEXT: setae %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax -; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: btq $63, %rax +; X64-BMI2-NEXT: setae %al ; X64-BMI2-NEXT: retq %t0 = shl i64 9223372036854775808, %y %t1 = and i64 %t0, %x @@ -395,17 +395,18 @@ ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-65536, %eax # imm = 0xFFFF0000 -; X86-BMI1-NEXT: movl $65535, %edx # imm = 0xFFFF -; X86-BMI1-NEXT: shldl %cl, %eax, %edx -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: cmovnel %eax, %edx -; X86-BMI1-NEXT: cmovel %eax, %esi -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: orl %edx, %esi +; X86-BMI1-NEXT: cmovnel %esi, %eax +; X86-BMI1-NEXT: cmovel %esi, %edx +; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-BMI1-NEXT: movzwl %dx, %ecx +; X86-BMI1-NEXT: orl %eax, %ecx ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: popl %esi ; X86-BMI1-NEXT: retl @@ -414,17 +415,17 @@ ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $-65536, %eax # imm = 0xFFFF0000 -; X86-BMI2-NEXT: movl $65535, %edx # imm = 0xFFFF -; X86-BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx ; X86-BMI2-NEXT: xorl %esi, %esi ; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: cmovnel %eax, %edx -; X86-BMI2-NEXT: cmovel %eax, %esi -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: cmovnel %edx, %eax +; X86-BMI2-NEXT: cmovel %edx, %esi +; X86-BMI2-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-BMI2-NEXT: movzwl %si, %ecx +; X86-BMI2-NEXT: orl %eax, %ecx ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl @@ -432,18 +433,18 @@ ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shlq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shrq %cl, %rdi +; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 +; X64-BMI1-NEXT: testq %rax, %rdi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 +; X64-BMI2-NEXT: testq %rcx, %rax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i64 281474976645120, %y @@ -477,10 +478,10 @@ ; AVX2-LABEL: vec_4xi32_splat_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} ; ; X64-SSE2-LABEL: vec_4xi32_splat_eq: @@ -577,10 +578,10 @@ ; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} ; ; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: @@ -709,20 +710,19 @@ ; X86-LABEL: scalar_i8_signbit_ne: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) -; X86-NEXT: setne %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb %cl, %al +; X86-NEXT: shrb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_ne: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: testb %dil, %al -; X64-NEXT: setne %al +; X64-NEXT: shrb %cl, %al +; X64-NEXT: shrb $7, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = shl i8 128, %y %t1 = and i8 %t0, %x