Index: include/llvm/CodeGen/TargetLowering.h =================================================================== --- include/llvm/CodeGen/TargetLowering.h +++ include/llvm/CodeGen/TargetLowering.h @@ -534,6 +534,10 @@ return hasAndNotCompare(X); } + /// Return true if the target has a bit-test instruction: + /// (X & (1 << Y)) ==/!= 0 + virtual bool hasBitTest(SDValue X, SDValue Y) const { return false; } + /// There are two ways to clear extreme bits (either low or high): /// Mask: x & (-1 << y) (the instcombine canonical form) /// Shifts: x >> y << y @@ -566,6 +570,35 @@ return false; } + /// Given the pattern + /// (X & (C l>>/<< Y)) ==/!= 0 + /// return true if it should be transformed into: + /// ((X <> Y) & C) ==/!= 0 + /// WARNING: if 'X' is a constant, the fold may deadlock! + virtual bool shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + SDValue X, ConstantSDNode *XC, ConstantSDNode *CC, SDValue Y, + unsigned OldShiftOpcode, unsigned NewShiftOpcode) const { + if (hasBitTest(X, Y)) { + // One interesting pattern that we'd want to form is 'bit test': + // ((1 << Y) & C) ==/!= 0 + // But we also need to be careful not to try to reverse that fold. + + // Is this '1 << Y' ? + if (OldShiftOpcode == ISD::SHL && CC->isOne()) + return false; // Keep the 'bit test' pattern. + + // Will it be '1 << Y' after the transform ? + if (XC && NewShiftOpcode == ISD::SHL && XC->isOne()) + return true; // Do form the 'bit test' pattern. + } + + // If 'X' is a constant, and we transform, then we will immediately + // try to undo the fold, thus causing endless combine loop. + // So by default, let's assume everyone prefers the fold + // iff 'X' is not a constant. + return !XC; + } + /// Return true if the target wants to use the optimization that /// turns ext(promotableInst1(...(promotableInstN(load)))) into /// promotedInst1(...(promotedInstN(ext(load)))). @@ -4055,6 +4088,11 @@ SDValue N1, ISD::CondCode Cond, DAGCombinerInfo &DCI, const SDLoc &DL) const; + + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + SDValue optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const; }; /// Given an LLVM IR type and return type attributes, compute the return value Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -2577,6 +2577,77 @@ return T2; } +// (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 +SDValue TargetLowering::optimizeSetCCByHoistingAndByConstFromLogicalShift( + EVT SCCVT, SDValue N0, SDValue N1C, ISD::CondCode Cond, + DAGCombinerInfo &DCI, const SDLoc &DL) const { + assert(isConstOrConstSplat(N1C) && + isConstOrConstSplat(N1C)->getAPIntValue().isNullValue() && + "Should be a comparison with 0."); + assert((Cond == ISD::SETEQ || Cond == ISD::SETNE) && + "Valid only for [in]equality comparisons."); + + unsigned NewShiftOpcode; + SDValue X, C, Y; + + SelectionDAG &DAG = DCI.DAG; + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Look for '(C l>>/<< Y)'. + auto Match = [&NewShiftOpcode, &X, &C, &Y, &TLI](SDValue V) { + // The shift should be one-use. + if (!V.hasOneUse()) + return false; + unsigned OldShiftOpcode = V.getOpcode(); + switch (OldShiftOpcode) { + case ISD::SHL: + NewShiftOpcode = ISD::SRL; + break; + case ISD::SRL: + NewShiftOpcode = ISD::SHL; + break; + default: + return false; // must be a logical shift. + } + // We should be shifting a constant. + // FIXME: best to use isConstantOrConstantVector(). + C = V.getOperand(0); + ConstantSDNode *CC = + isConstOrConstSplat(C, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + if (!CC) + return false; + Y = V.getOperand(1); + + ConstantSDNode *XC = + isConstOrConstSplat(X, /*AllowUndefs=*/true, /*AllowTruncation=*/true); + return TLI.shouldProduceAndByConstByHoistingConstFromShiftsLHSOfAnd( + X, XC, CC, Y, OldShiftOpcode, NewShiftOpcode); + }; + + // LHS of comparison should be an one-use 'and'. + if (N0.getOpcode() != ISD::AND || !N0.hasOneUse()) + return SDValue(); + + X = N0.getOperand(0); + SDValue Mask = N0.getOperand(1); + + // 'and' is commutative! + if (!Match(Mask)) { + std::swap(X, Mask); + if (!Match(Mask)) + return SDValue(); + } + + EVT VT = X.getValueType(); + + // Produce: + // ((X 'OppositeShiftOpcode' Y) & C) Cond 0 + SDValue T0 = DAG.getNode(NewShiftOpcode, DL, VT, X, Y); + SDValue T1 = DAG.getNode(ISD::AND, DL, VT, T0, C); + SDValue T2 = DAG.getSetCC(DL, SCCVT, T1, N1C, Cond); + return T2; +} + /// Try to fold an equality comparison with a {add/sub/xor} binary operation as /// the 1st operand (N0). Callers are expected to swap the N0/N1 parameters to /// handle the commuted versions of these patterns. @@ -3108,6 +3179,14 @@ } } + if (Cond == ISD::SETEQ || Cond == ISD::SETNE) { + // (X & (C l>>/<< Y)) ==/!= 0 --> ((X <> Y) & C) ==/!= 0 + if (C1.isNullValue()) + if (SDValue CC = optimizeSetCCByHoistingAndByConstFromLogicalShift( + VT, N0, N1, Cond, DCI, dl)) + return CC; + } + // If we have "setcc X, C0", check to see if we can shrink the immediate // by changing cc. // TODO: Support this for vectors after legalize ops. Index: lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.h +++ lib/Target/Hexagon/HexagonISelLowering.h @@ -127,6 +127,8 @@ bool isCheapToSpeculateCtlz() const override { return true; } bool isCtlzFast() const override { return true; } + bool hasBitTest(SDValue X, SDValue Y) const override; + bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const override; /// Return true if an FMA operation is faster than a pair of mul and add Index: lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- lib/Target/Hexagon/HexagonISelLowering.cpp +++ lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1821,6 +1821,10 @@ return isTruncateFree(EVT::getEVT(Ty1), EVT::getEVT(Ty2)); } +bool HexagonTargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'tstbit' +} + bool HexagonTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const { if (!VT1.isSimple() || !VT2.isSimple()) return false; Index: lib/Target/X86/X86ISelLowering.h =================================================================== --- lib/Target/X86/X86ISelLowering.h +++ lib/Target/X86/X86ISelLowering.h @@ -840,6 +840,8 @@ bool hasAndNot(SDValue Y) const override; + bool hasBitTest(SDValue X, SDValue Y) const override; + bool shouldFoldConstantShiftPairToMask(const SDNode *N, CombineLevel Level) const override; Index: lib/Target/X86/X86ISelLowering.cpp =================================================================== --- lib/Target/X86/X86ISelLowering.cpp +++ lib/Target/X86/X86ISelLowering.cpp @@ -4969,6 +4969,10 @@ return Subtarget.hasSSE2(); } +bool X86TargetLowering::hasBitTest(SDValue X, SDValue Y) const { + return X.getValueType().isScalarInteger(); // 'bt' +} + bool X86TargetLowering::shouldFoldConstantShiftPairToMask( const SDNode *N, CombineLevel Level) const { assert(((N->getOpcode() == ISD::SHL && Index: test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ test/CodeGen/AArch64/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -15,11 +15,9 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x80 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 128, %y @@ -31,11 +29,9 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 1, %y @@ -47,11 +43,9 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x18 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i8 24, %y @@ -65,11 +59,9 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #32768 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x8000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i16 32768, %y @@ -81,11 +73,9 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i16 1, %y @@ -97,11 +87,9 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4080 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0xff0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i16 4080, %y @@ -115,9 +103,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x80000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 2147483648, %y @@ -129,9 +116,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 1, %y @@ -143,9 +129,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16776960 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: tst w8, #0xffff00 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i32 16776960, %y @@ -159,9 +144,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 -; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tst x8, #0x8000000000000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i64 9223372036854775808, %y @@ -173,9 +157,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tst x8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i64 1, %y @@ -187,9 +170,8 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474976645120 -; CHECK-NEXT: lsr x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsl x8, x0, x1 +; CHECK-NEXT: tst x8, #0xffffffff0000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = lshr i64 281474976645120, %y @@ -205,10 +187,9 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_splat_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -238,10 +219,9 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -288,12 +268,9 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #128 ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsr w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: lsl w8, w0, w1 +; CHECK-NEXT: ubfx w0, w8, #7, #1 ; CHECK-NEXT: ret %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x Index: test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll =================================================================== --- test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ test/CodeGen/AArch64/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -15,11 +15,10 @@ define i1 @scalar_i8_signbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x80 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 128, %y @@ -31,11 +30,10 @@ define i1 @scalar_i8_lowestbit_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 1, %y @@ -47,11 +45,10 @@ define i1 @scalar_i8_bitsinmiddle_eq(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #24 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x18 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i8 24, %y @@ -65,11 +62,10 @@ define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-32768 +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x8000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i16 32768, %y @@ -81,11 +77,10 @@ define i1 @scalar_i16_lowestbit_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i16 1, %y @@ -97,11 +92,10 @@ define i1 @scalar_i16_bitsinmiddle_eq(i16 %x, i16 %y) nounwind { ; CHECK-LABEL: scalar_i16_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #4080 +; CHECK-NEXT: and w8, w0, #0xffff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xffff +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: tst w8, #0xff0 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i16 4080, %y @@ -115,9 +109,8 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-2147483648 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: tst w8, #0x80000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i32 2147483648, %y @@ -129,9 +122,8 @@ define i1 @scalar_i32_lowestbit_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: tst w8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i32 1, %y @@ -143,9 +135,8 @@ define i1 @scalar_i32_bitsinmiddle_eq(i32 %x, i32 %y) nounwind { ; CHECK-LABEL: scalar_i32_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #16776960 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: tst w8, w0 +; CHECK-NEXT: lsr w8, w0, w1 +; CHECK-NEXT: tst w8, #0xffff00 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i32 16776960, %y @@ -159,9 +150,8 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_signbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #-9223372036854775808 -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: tst x8, #0x8000000000000000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i64 9223372036854775808, %y @@ -173,9 +163,8 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_lowestbit_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #1 -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: tst x8, #0x1 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i64 1, %y @@ -187,9 +176,8 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; CHECK-LABEL: scalar_i64_bitsinmiddle_eq: ; CHECK: // %bb.0: -; CHECK-NEXT: mov x8, #281474976645120 -; CHECK-NEXT: lsl x8, x8, x1 -; CHECK-NEXT: tst x8, x0 +; CHECK-NEXT: lsr x8, x0, x1 +; CHECK-NEXT: tst x8, #0xffffffff0000 ; CHECK-NEXT: cset w0, eq ; CHECK-NEXT: ret %t0 = shl i64 281474976645120, %y @@ -205,9 +193,10 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_splat_eq: ; CHECK: // %bb.0: +; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -236,9 +225,10 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; CHECK-LABEL: vec_4xi32_nonsplat_undef0_eq: ; CHECK: // %bb.0: +; CHECK-NEXT: neg v1.4s, v1.4s ; CHECK-NEXT: movi v2.4s, #1 -; CHECK-NEXT: ushl v1.4s, v2.4s, v1.4s -; CHECK-NEXT: and v0.16b, v1.16b, v0.16b +; CHECK-NEXT: ushl v0.4s, v0.4s, v1.4s +; CHECK-NEXT: and v0.16b, v0.16b, v2.16b ; CHECK-NEXT: cmeq v0.4s, v0.4s, #0 ; CHECK-NEXT: xtn v0.4h, v0.4s ; CHECK-NEXT: ret @@ -283,12 +273,10 @@ define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { ; CHECK-LABEL: scalar_i8_signbit_ne: ; CHECK: // %bb.0: -; CHECK-NEXT: mov w8, #-128 +; CHECK-NEXT: and w8, w0, #0xff ; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK-NEXT: lsl w8, w8, w1 -; CHECK-NEXT: and w8, w8, w0 -; CHECK-NEXT: tst w8, #0xff -; CHECK-NEXT: cset w0, ne +; CHECK-NEXT: lsr w8, w8, w1 +; CHECK-NEXT: ubfx w0, w8, #7, #1 ; CHECK-NEXT: ret %t0 = shl i8 128, %y %t1 = and i8 %t0, %x Index: test/CodeGen/AMDGPU/commute-shifts.ll =================================================================== --- test/CodeGen/AMDGPU/commute-shifts.ll +++ test/CodeGen/AMDGPU/commute-shifts.ll @@ -16,10 +16,10 @@ ; SI-NEXT: s_mov_b32 s7, s0 ; SI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; SI-NEXT: v_and_b32_e32 v0, 7, v0 -; SI-NEXT: v_lshl_b32_e32 v0, 1, v0 ; SI-NEXT: s_waitcnt vmcnt(0) -; SI-NEXT: v_and_b32_e32 v0, v2, v0 -; SI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; SI-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; SI-NEXT: v_and_b32_e32 v0, 1, v0 +; SI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; SI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; SI-NEXT: v_cvt_pkrtz_f16_f32_e32 v0, s0, v0 ; SI-NEXT: ; return to shader part epilog @@ -37,10 +37,10 @@ ; VI-NEXT: s_mov_b32 s7, s0 ; VI-NEXT: image_load v2, v0, s[0:7] dmask:0x1 unorm ; VI-NEXT: v_and_b32_e32 v0, 7, v0 -; VI-NEXT: v_lshlrev_b32_e64 v0, v0, 1 ; VI-NEXT: s_waitcnt vmcnt(0) -; VI-NEXT: v_and_b32_e32 v0, v2, v0 -; VI-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0 +; VI-NEXT: v_lshrrev_b32_e32 v0, v0, v2 +; VI-NEXT: v_and_b32_e32 v0, 1, v0 +; VI-NEXT: v_cmp_eq_u32_e32 vcc, 1, v0 ; VI-NEXT: v_cndmask_b32_e32 v0, 0, v1, vcc ; VI-NEXT: v_cvt_pkrtz_f16_f32 v0, s0, v0 ; VI-NEXT: ; return to shader part epilog Index: test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ test/CodeGen/ARM/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -21,20 +21,18 @@ ; ARM-LABEL: scalar_i8_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mov r2, #128 -; ARM-NEXT: and r0, r0, r2, lsr r1 +; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: mov r1, #1 ; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: eor r0, r1, r0, lsr #7 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #128 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #128 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -42,12 +40,10 @@ ; THUMB78-LABEL: scalar_i8_signbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #128 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 ; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: eor.w r0, r1, r0, lsr #7 ; THUMB78-NEXT: bx lr %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x @@ -60,19 +56,15 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsl r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -80,12 +72,9 @@ ; THUMB78-LABEL: scalar_i8_lowestbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i8 1, %y %t1 = and i8 %t0, %x @@ -98,8 +87,7 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 ; ARM-NEXT: mov r2, #24 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: and r0, r2, r0, lsl r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -107,10 +95,9 @@ ; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #24 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #24 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -118,10 +105,8 @@ ; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #24 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: and r0, r0, #24 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 ; THUMB78-NEXT: bx lr @@ -137,21 +122,19 @@ ; ARM-LABEL: scalar_i16_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: mov r2, #32768 -; ARM-NEXT: and r0, r0, r2, lsr r1 +; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: mov r1, #1 ; ARM-NEXT: uxth r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: eor r0, r1, r0, lsr #15 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r2, #15 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #15 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -159,12 +142,10 @@ ; THUMB78-LABEL: scalar_i16_signbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: mov.w r2, #32768 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 ; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: eor.w r0, r1, r0, lsr #15 ; THUMB78-NEXT: bx lr %t0 = lshr i16 32768, %y %t1 = and i16 %t0, %x @@ -177,19 +158,15 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxth r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsl r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -197,12 +174,9 @@ ; THUMB78-LABEL: scalar_i16_lowestbit_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i16 1, %y %t1 = and i16 %t0, %x @@ -215,8 +189,7 @@ ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 ; ARM-NEXT: mov r2, #4080 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: and r0, r2, r0, lsl r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -224,11 +197,10 @@ ; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #255 -; THUMB6-NEXT: lsls r2, r2, #4 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #255 +; THUMB6-NEXT: lsls r1, r1, #4 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr @@ -236,10 +208,8 @@ ; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq: ; THUMB78: @ %bb.0: ; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: mov.w r2, #4080 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: and r0, r0, #4080 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 ; THUMB78-NEXT: bx lr @@ -254,29 +224,25 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; ARM-LABEL: scalar_i32_signbit_eq: ; ARM: @ %bb.0: -; ARM-NEXT: mov r2, #-2147483648 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mvn r0, r0, lsl r1 +; ARM-NEXT: lsr r0, r0, #31 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r2, #31 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #31 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_signbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: mov.w r2, #-2147483648 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: mvns r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #31 ; THUMB78-NEXT: bx lr %t0 = lshr i32 2147483648, %y %t1 = and i32 %t0, %x @@ -288,27 +254,23 @@ ; ARM-LABEL: scalar_i32_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsr r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsl r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_lowestbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = lshr i32 1, %y %t1 = and i32 %t0, %x @@ -321,7 +283,7 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: mov r2, #65280 ; ARM6-NEXT: orr r2, r2, #16711680 -; ARM6-NEXT: and r0, r0, r2, lsr r1 +; ARM6-NEXT: and r0, r2, r0, lsl r1 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 ; ARM6-NEXT: bx lr @@ -330,18 +292,18 @@ ; ARM78: @ %bb.0: ; ARM78-NEXT: movw r2, #65280 ; ARM78-NEXT: movt r2, #255 -; ARM78-NEXT: and r0, r0, r2, lsr r1 +; ARM78-NEXT: and r0, r2, r0, lsl r1 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: ldr r2, .LCPI8_0 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsls r0, r1 +; THUMB6-NEXT: ldr r1, .LCPI8_0 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: @@ -350,9 +312,9 @@ ; ; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movw r2, #65280 -; THUMB78-NEXT: movt r2, #255 -; THUMB78-NEXT: lsr.w r1, r2, r1 +; THUMB78-NEXT: lsls r0, r1 +; THUMB78-NEXT: movw r1, #65280 +; THUMB78-NEXT: movt r1, #255 ; THUMB78-NEXT: ands r0, r1 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 @@ -366,98 +328,42 @@ ; i64 scalar define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { -; ARM6-LABEL: scalar_i64_signbit_eq: -; ARM6: @ %bb.0: -; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mov r12, #-2147483648 -; ARM6-NEXT: sub lr, r2, #32 -; ARM6-NEXT: lsr r3, r12, r2 -; ARM6-NEXT: rsb r2, r2, #32 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: lsl r2, r12, r2 -; ARM6-NEXT: movge r3, #0 -; ARM6-NEXT: lsrge r2, r12, lr -; ARM6-NEXT: and r1, r3, r1 -; ARM6-NEXT: and r0, r2, r0 -; ARM6-NEXT: orr r0, r0, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r11, pc} -; -; ARM78-LABEL: scalar_i64_signbit_eq: -; ARM78: @ %bb.0: -; ARM78-NEXT: push {r11, lr} -; ARM78-NEXT: mov r12, #-2147483648 -; ARM78-NEXT: sub lr, r2, #32 -; ARM78-NEXT: lsr r3, r12, r2 -; ARM78-NEXT: rsb r2, r2, #32 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: lsl r2, r12, r2 -; ARM78-NEXT: movwge r3, #0 -; ARM78-NEXT: lsrge r2, r12, lr -; ARM78-NEXT: and r1, r3, r1 -; ARM78-NEXT: and r0, r2, r0 -; ARM78-NEXT: orr r0, r0, r1 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r11, pc} +; ARM-LABEL: scalar_i64_signbit_eq: +; ARM: @ %bb.0: +; ARM-NEXT: rsb r3, r2, #32 +; ARM-NEXT: lsr r3, r0, r3 +; ARM-NEXT: orr r1, r3, r1, lsl r2 +; ARM-NEXT: sub r2, r2, #32 +; ARM-NEXT: cmp r2, #0 +; ARM-NEXT: lslge r1, r0, r2 +; ARM-NEXT: mvn r0, r1 +; ARM-NEXT: lsr r0, r0, #31 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __ashldi3 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: lsls r1, r0, #31 -; THUMB6-NEXT: movs r0, #0 -; THUMB6-NEXT: bl __lshrdi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 -; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsls r2, r0, #31 +; THUMB6-NEXT: ands r2, r1 +; THUMB6-NEXT: rsbs r0, r2, #0 +; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: pop {r7, pc} ; -; THUMB7-LABEL: scalar_i64_signbit_eq: -; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} -; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: mov.w r12, #-2147483648 -; THUMB7-NEXT: sub.w lr, r2, #32 -; THUMB7-NEXT: lsr.w r2, r12, r2 -; THUMB7-NEXT: lsl.w r3, r12, r3 -; THUMB7-NEXT: cmp.w lr, #0 -; THUMB7-NEXT: it ge -; THUMB7-NEXT: lsrge.w r3, r12, lr -; THUMB7-NEXT: it ge -; THUMB7-NEXT: movge r2, #0 -; THUMB7-NEXT: ands r0, r3 -; THUMB7-NEXT: ands r1, r2 -; THUMB7-NEXT: orrs r0, r1 -; THUMB7-NEXT: clz r0, r0 -; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} -; -; THUMB8-LABEL: scalar_i64_signbit_eq: -; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: sub.w lr, r2, #32 -; THUMB8-NEXT: mov.w r12, #-2147483648 -; THUMB8-NEXT: cmp.w lr, #0 -; THUMB8-NEXT: lsl.w r3, r12, r3 -; THUMB8-NEXT: lsr.w r2, r12, r2 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: lsrge.w r3, r12, lr -; THUMB8-NEXT: it ge -; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r0, r3 -; THUMB8-NEXT: ands r1, r2 -; THUMB8-NEXT: orrs r0, r1 -; THUMB8-NEXT: clz r0, r0 -; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB78-LABEL: scalar_i64_signbit_eq: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: rsb.w r3, r2, #32 +; THUMB78-NEXT: lsls r1, r2 +; THUMB78-NEXT: subs r2, #32 +; THUMB78-NEXT: lsr.w r3, r0, r3 +; THUMB78-NEXT: orrs r1, r3 +; THUMB78-NEXT: cmp r2, #0 +; THUMB78-NEXT: it ge +; THUMB78-NEXT: lslge.w r1, r0, r2 +; THUMB78-NEXT: mvns r0, r1 +; THUMB78-NEXT: lsrs r0, r0, #31 +; THUMB78-NEXT: bx lr %t0 = lshr i64 9223372036854775808, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -467,55 +373,55 @@ define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_lowestbit_eq: ; ARM6: @ %bb.0: +; ARM6-NEXT: sub r1, r2, #32 +; ARM6-NEXT: lsl r0, r0, r2 +; ARM6-NEXT: cmp r1, #0 ; ARM6-NEXT: mov r1, #1 -; ARM6-NEXT: lsr r1, r1, r2 -; ARM6-NEXT: sub r2, r2, #32 -; ARM6-NEXT: cmp r2, #0 -; ARM6-NEXT: movge r1, #0 -; ARM6-NEXT: and r0, r1, r0 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 +; ARM6-NEXT: movge r0, #0 +; ARM6-NEXT: bic r0, r1, r0 ; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_lowestbit_eq: ; ARM78: @ %bb.0: +; ARM78-NEXT: sub r1, r2, #32 +; ARM78-NEXT: lsl r0, r0, r2 +; ARM78-NEXT: cmp r1, #0 ; ARM78-NEXT: mov r1, #1 -; ARM78-NEXT: lsr r1, r1, r2 -; ARM78-NEXT: sub r2, r2, #32 -; ARM78-NEXT: cmp r2, #0 -; ARM78-NEXT: movwge r1, #0 -; ARM78-NEXT: and r0, r1, r0 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 +; ARM78-NEXT: movwge r0, #0 +; ARM78-NEXT: bic r0, r1, r0 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: movs r1, #0 -; THUMB6-NEXT: bl __lshrdi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __ashldi3 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; -; THUMB78-LABEL: scalar_i64_lowestbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r1, #1 -; THUMB78-NEXT: lsrs r1, r2 -; THUMB78-NEXT: subs r2, #32 -; THUMB78-NEXT: cmp r2, #0 -; THUMB78-NEXT: it ge -; THUMB78-NEXT: movge r1, #0 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i64_lowestbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: sub.w r1, r2, #32 +; THUMB7-NEXT: lsls r0, r2 +; THUMB7-NEXT: cmp r1, #0 +; THUMB7-NEXT: it ge +; THUMB7-NEXT: movge r0, #0 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: bic.w r0, r1, r0 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i64_lowestbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: lsls r0, r2 +; THUMB8-NEXT: sub.w r1, r2, #32 +; THUMB8-NEXT: cmp r1, #0 +; THUMB8-NEXT: it ge +; THUMB8-NEXT: movge r0, #0 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: bic.w r0, r1, r0 +; THUMB8-NEXT: bx lr %t0 = lshr i64 1, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -525,118 +431,67 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mov r12, #255 -; ARM6-NEXT: sub lr, r2, #32 -; ARM6-NEXT: orr r12, r12, #65280 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: lsr r3, r12, r2 -; ARM6-NEXT: movge r3, #0 -; ARM6-NEXT: and r1, r3, r1 -; ARM6-NEXT: mov r3, #16711680 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: orr r3, r3, #-16777216 -; ARM6-NEXT: lsr r3, r3, r2 -; ARM6-NEXT: rsb r2, r2, #32 -; ARM6-NEXT: orr r2, r3, r12, lsl r2 -; ARM6-NEXT: lsrge r2, r12, lr -; ARM6-NEXT: and r0, r2, r0 -; ARM6-NEXT: orr r0, r0, r1 +; ARM6-NEXT: rsb r3, r2, #32 +; ARM6-NEXT: lsr r3, r0, r3 +; ARM6-NEXT: orr r1, r3, r1, lsl r2 +; ARM6-NEXT: sub r3, r2, #32 +; ARM6-NEXT: cmp r3, #0 +; ARM6-NEXT: lslge r1, r0, r3 +; ARM6-NEXT: lsl r0, r0, r2 +; ARM6-NEXT: movge r0, #0 +; ARM6-NEXT: pkhbt r0, r1, r0 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r11, pc} +; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: push {r11, lr} -; ARM78-NEXT: movw r12, #65535 -; ARM78-NEXT: sub lr, r2, #32 -; ARM78-NEXT: lsr r3, r12, r2 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: movwge r3, #0 -; ARM78-NEXT: and r1, r3, r1 -; ARM78-NEXT: movw r3, #0 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: movt r3, #65535 -; ARM78-NEXT: lsr r3, r3, r2 -; ARM78-NEXT: rsb r2, r2, #32 -; ARM78-NEXT: orr r2, r3, r12, lsl r2 -; ARM78-NEXT: lsrge r2, r12, lr -; ARM78-NEXT: and r0, r2, r0 -; ARM78-NEXT: orr r0, r0, r1 +; ARM78-NEXT: rsb r3, r2, #32 +; ARM78-NEXT: lsr r3, r0, r3 +; ARM78-NEXT: orr r1, r3, r1, lsl r2 +; ARM78-NEXT: sub r3, r2, #32 +; ARM78-NEXT: cmp r3, #0 +; ARM78-NEXT: lslge r1, r0, r3 +; ARM78-NEXT: lsl r0, r0, r2 +; ARM78-NEXT: movwge r0, #0 +; ARM78-NEXT: pkhbt r0, r1, r0 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r11, pc} +; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: ldr r0, .LCPI11_0 -; THUMB6-NEXT: ldr r1, .LCPI11_1 -; THUMB6-NEXT: bl __lshrdi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __ashldi3 +; THUMB6-NEXT: ldr r2, .LCPI11_0 +; THUMB6-NEXT: ands r2, r0 +; THUMB6-NEXT: uxth r0, r1 +; THUMB6-NEXT: adds r1, r2, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI11_0: ; THUMB6-NEXT: .long 4294901760 @ 0xffff0000 -; THUMB6-NEXT: .LCPI11_1: -; THUMB6-NEXT: .long 65535 @ 0xffff ; -; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq: -; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} -; THUMB7-NEXT: movs r3, #0 -; THUMB7-NEXT: movw lr, #65535 -; THUMB7-NEXT: movt r3, #65535 -; THUMB7-NEXT: lsr.w r12, r3, r2 -; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: lsl.w r3, lr, r3 -; THUMB7-NEXT: orr.w r3, r3, r12 -; THUMB7-NEXT: sub.w r12, r2, #32 -; THUMB7-NEXT: lsr.w r2, lr, r2 -; THUMB7-NEXT: cmp.w r12, #0 -; THUMB7-NEXT: it ge -; THUMB7-NEXT: lsrge.w r3, lr, r12 -; THUMB7-NEXT: it ge -; THUMB7-NEXT: movge r2, #0 -; THUMB7-NEXT: ands r0, r3 -; THUMB7-NEXT: ands r1, r2 -; THUMB7-NEXT: orrs r0, r1 -; THUMB7-NEXT: clz r0, r0 -; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} -; -; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq: -; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: movs r3, #0 -; THUMB8-NEXT: movw lr, #65535 -; THUMB8-NEXT: movt r3, #65535 -; THUMB8-NEXT: lsr.w r12, r3, r2 -; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: lsl.w r3, lr, r3 -; THUMB8-NEXT: orr.w r3, r3, r12 -; THUMB8-NEXT: sub.w r12, r2, #32 -; THUMB8-NEXT: cmp.w r12, #0 -; THUMB8-NEXT: lsr.w r2, lr, r2 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: lsrge.w r3, lr, r12 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r0, r3 -; THUMB8-NEXT: ands r1, r2 -; THUMB8-NEXT: orrs r0, r1 -; THUMB8-NEXT: clz r0, r0 -; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB78-LABEL: scalar_i64_bitsinmiddle_eq: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: rsb.w r3, r2, #32 +; THUMB78-NEXT: lsls r1, r2 +; THUMB78-NEXT: lsr.w r3, r0, r3 +; THUMB78-NEXT: orrs r1, r3 +; THUMB78-NEXT: sub.w r3, r2, #32 +; THUMB78-NEXT: cmp r3, #0 +; THUMB78-NEXT: it ge +; THUMB78-NEXT: lslge.w r1, r0, r3 +; THUMB78-NEXT: lsl.w r0, r0, r2 +; THUMB78-NEXT: it ge +; THUMB78-NEXT: movge r0, #0 +; THUMB78-NEXT: pkhbt r0, r1, r0 +; THUMB78-NEXT: clz r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: bx lr %t0 = lshr i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -653,33 +508,24 @@ ; ARM6-NEXT: push {r11, lr} ; ARM6-NEXT: ldr r12, [sp, #8] ; ARM6-NEXT: mov lr, #1 -; ARM6-NEXT: and r0, r0, lr, lsr r12 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #12] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r1, r1, lr, lsr r12 +; ARM6-NEXT: bic r1, lr, r1, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #16] -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: and r2, r2, lr, lsr r12 +; ARM6-NEXT: bic r2, lr, r2, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #20] -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: and r3, r3, lr, lsr r12 -; ARM6-NEXT: lsr r2, r2, #5 -; ARM6-NEXT: clz r3, r3 -; ARM6-NEXT: lsr r3, r3, #5 +; ARM6-NEXT: bic r3, lr, r3, lsl r12 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_splat_eq: ; ARM78: @ %bb.0: +; ARM78-NEXT: vmov d17, r2, r3 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d16, d17}, [r12] -; ARM78-NEXT: vmov.i32 q9, #0x1 -; ARM78-NEXT: vneg.s32 q8, q8 -; ARM78-NEXT: vshl.u32 q8, q9, q8 -; ARM78-NEXT: vmov d19, r2, r3 -; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d18, d19}, [r12] +; ARM78-NEXT: vmov d16, r0, r1 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -687,43 +533,39 @@ ; ; THUMB6-LABEL: vec_4xi32_splat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r6, lr} -; THUMB6-NEXT: ldr r5, [sp, #16] +; THUMB6-NEXT: push {r4, r5, r7, lr} +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsls r0, r4 ; THUMB6-NEXT: movs r4, #1 -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsrs r6, r5 -; THUMB6-NEXT: ands r6, r0 -; THUMB6-NEXT: rsbs r0, r6, #0 -; THUMB6-NEXT: adcs r0, r6 +; THUMB6-NEXT: ands r0, r4 +; THUMB6-NEXT: rsbs r5, r0, #0 +; THUMB6-NEXT: adcs r0, r5 ; THUMB6-NEXT: ldr r5, [sp, #20] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsrs r6, r5 -; THUMB6-NEXT: ands r6, r1 -; THUMB6-NEXT: rsbs r1, r6, #0 -; THUMB6-NEXT: adcs r1, r6 +; THUMB6-NEXT: lsls r1, r5 +; THUMB6-NEXT: ands r1, r4 +; THUMB6-NEXT: rsbs r5, r1, #0 +; THUMB6-NEXT: adcs r1, r5 ; THUMB6-NEXT: ldr r5, [sp, #24] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsrs r6, r5 -; THUMB6-NEXT: ands r6, r2 -; THUMB6-NEXT: rsbs r2, r6, #0 -; THUMB6-NEXT: adcs r2, r6 +; THUMB6-NEXT: lsls r2, r5 +; THUMB6-NEXT: ands r2, r4 +; THUMB6-NEXT: rsbs r5, r2, #0 +; THUMB6-NEXT: adcs r2, r5 ; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsrs r4, r5 -; THUMB6-NEXT: ands r4, r3 -; THUMB6-NEXT: rsbs r3, r4, #0 +; THUMB6-NEXT: lsls r3, r5 +; THUMB6-NEXT: ands r3, r4 +; THUMB6-NEXT: rsbs r4, r3, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r6, pc} +; THUMB6-NEXT: pop {r4, r5, r7, pc} ; ; THUMB78-LABEL: vec_4xi32_splat_eq: ; THUMB78: @ %bb.0: +; THUMB78-NEXT: vmov d17, r2, r3 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] -; THUMB78-NEXT: vmov.i32 q9, #0x1 -; THUMB78-NEXT: vneg.s32 q8, q8 -; THUMB78-NEXT: vshl.u32 q8, q9, q8 -; THUMB78-NEXT: vmov d19, r2, r3 -; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] +; THUMB78-NEXT: vmov d16, r0, r1 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -739,20 +581,16 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: ldr r12, [sp, #4] ; ARM6-NEXT: mov r0, #1 -; ARM6-NEXT: and r0, r1, r0, lsr r12 +; ARM6-NEXT: bic r1, r0, r1, lsl r12 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r1, r0, #5 ; ARM6-NEXT: mov r0, #65280 ; ARM6-NEXT: orr r0, r0, #16711680 -; ARM6-NEXT: and r0, r2, r0, lsr r12 -; ARM6-NEXT: ldr r12, [sp, #12] +; ARM6-NEXT: and r0, r0, r2, lsl r12 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r2, r0, #5 -; ARM6-NEXT: mov r0, #-2147483648 -; ARM6-NEXT: and r0, r3, r0, lsr r12 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r3, r0, #5 +; ARM6-NEXT: ldr r0, [sp, #12] +; ARM6-NEXT: mvn r0, r3, lsl r0 +; ARM6-NEXT: lsr r3, r0, #31 ; ARM6-NEXT: mov r0, #1 ; ARM6-NEXT: bx lr ; @@ -781,27 +619,26 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r0, [sp, #12] +; THUMB6-NEXT: lsls r1, r0 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #24] -; THUMB6-NEXT: ldr r5, .LCPI13_0 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r2 -; THUMB6-NEXT: rsbs r2, r5, #0 -; THUMB6-NEXT: adcs r2, r5 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsls r2, r4 +; THUMB6-NEXT: ldr r4, .LCPI13_0 +; THUMB6-NEXT: ands r4, r2 +; THUMB6-NEXT: rsbs r2, r4, #0 +; THUMB6-NEXT: adcs r2, r4 +; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: lsls r3, r4 ; THUMB6-NEXT: lsls r4, r0, #31 -; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsrs r4, r5 ; THUMB6-NEXT: ands r4, r3 ; THUMB6-NEXT: rsbs r3, r4, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI13_0: @@ -842,29 +679,22 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsr r2 +; ARM6-NEXT: bic r1, lr, r1, lsl r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsr r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsr r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 +; ARM6-NEXT: bic r3, lr, r3, lsl r2 ; ARM6-NEXT: mov r2, #1 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; ARM78: @ %bb.0: +; ARM78-NEXT: vmov d17, r2, r3 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d16, d17}, [r12] -; ARM78-NEXT: vmov.i32 q9, #0x1 -; ARM78-NEXT: vneg.s32 q8, q8 -; ARM78-NEXT: vshl.u32 q8, q9, q8 -; ARM78-NEXT: vmov d19, r2, r3 -; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d18, d19}, [r12] +; ARM78-NEXT: vmov d16, r0, r1 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -872,38 +702,34 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsls r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsls r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r3 -; THUMB6-NEXT: rsbs r3, r5, #0 -; THUMB6-NEXT: adcs r3, r5 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsls r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r4, r3, #0 +; THUMB6-NEXT: adcs r3, r4 +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB78: @ %bb.0: +; THUMB78-NEXT: vmov d17, r2, r3 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] -; THUMB78-NEXT: vmov.i32 q9, #0x1 -; THUMB78-NEXT: vneg.s32 q8, q8 -; THUMB78-NEXT: vshl.u32 q8, q9, q8 -; THUMB78-NEXT: vmov d19, r2, r3 -; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] +; THUMB78-NEXT: vmov d16, r0, r1 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -920,16 +746,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsr r2 +; ARM6-NEXT: bic r1, lr, r1, lsl r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsr r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsr r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 +; ARM6-NEXT: bic r3, lr, r3, lsl r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq: @@ -949,26 +769,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsls r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsls r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsrs r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsls r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB78: @ %bb.0: @@ -996,16 +814,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsr r2 +; ARM6-NEXT: bic r1, lr, r1, lsl r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsr r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsr r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsl r12 +; ARM6-NEXT: bic r3, lr, r3, lsl r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq: @@ -1025,26 +837,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsls r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsls r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsrs r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsrs r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsls r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB78: @ %bb.0: @@ -1071,48 +881,21 @@ ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; ARM6-LABEL: scalar_i8_signbit_ne: -; ARM6: @ %bb.0: -; ARM6-NEXT: uxtb r1, r1 -; ARM6-NEXT: mov r2, #128 -; ARM6-NEXT: and r0, r0, r2, lsr r1 -; ARM6-NEXT: uxtb r0, r0 -; ARM6-NEXT: cmp r0, #0 -; ARM6-NEXT: movne r0, #1 -; ARM6-NEXT: bx lr +; ARM-LABEL: scalar_i8_signbit_ne: +; ARM: @ %bb.0: +; ARM-NEXT: uxtb r1, r1 +; ARM-NEXT: lsl r0, r0, r1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: lsr r0, r0, #7 +; ARM-NEXT: bx lr ; -; ARM78-LABEL: scalar_i8_signbit_ne: -; ARM78: @ %bb.0: -; ARM78-NEXT: uxtb r1, r1 -; ARM78-NEXT: mov r2, #128 -; ARM78-NEXT: and r0, r0, r2, lsr r1 -; ARM78-NEXT: uxtb r0, r0 -; ARM78-NEXT: cmp r0, #0 -; ARM78-NEXT: movwne r0, #1 -; ARM78-NEXT: bx lr -; -; THUMB6-LABEL: scalar_i8_signbit_ne: -; THUMB6: @ %bb.0: -; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #128 -; THUMB6-NEXT: lsrs r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r0, r2 -; THUMB6-NEXT: subs r1, r0, #1 -; THUMB6-NEXT: sbcs r0, r1 -; THUMB6-NEXT: bx lr -; -; THUMB78-LABEL: scalar_i8_signbit_ne: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #128 -; THUMB78-NEXT: lsr.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: cmp r0, #0 -; THUMB78-NEXT: it ne -; THUMB78-NEXT: movne r0, #1 -; THUMB78-NEXT: bx lr +; THUMB-LABEL: scalar_i8_signbit_ne: +; THUMB: @ %bb.0: +; THUMB-NEXT: uxtb r1, r1 +; THUMB-NEXT: lsls r0, r1 +; THUMB-NEXT: uxtb r0, r0 +; THUMB-NEXT: lsrs r0, r0, #7 +; THUMB-NEXT: bx lr %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate Index: test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll =================================================================== --- test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ test/CodeGen/ARM/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -21,35 +21,43 @@ ; ARM-LABEL: scalar_i8_signbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mvn r2, #127 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: lsr r0, r0, r1 +; ARM-NEXT: mov r1, #1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: eor r0, r1, r0, lsr #7 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #127 -; THUMB6-NEXT: mvns r2, r2 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #128 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_signbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: mvn r2, #127 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_signbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: eor.w r0, r1, r0, lsr #7 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_signbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: eor.w r0, r1, r0, lsr #7 +; THUMB8-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -60,34 +68,39 @@ ; ARM-LABEL: scalar_i8_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxtb r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: bic r0, r2, r0, lsr r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_lowestbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_lowestbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: bic.w r0, r1, r0 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_lowestbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: bic.w r0, r1, r0 +; THUMB8-NEXT: bx lr %t0 = shl i8 1, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -98,9 +111,9 @@ ; ARM-LABEL: scalar_i8_bitsinmiddle_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxtb r1, r1 -; ARM-NEXT: mov r2, #24 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: mov r2, #24 +; ARM-NEXT: and r0, r2, r0, lsr r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -108,24 +121,33 @@ ; THUMB6-LABEL: scalar_i8_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #24 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r1, r2 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #24 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_bitsinmiddle_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: movs r2, #24 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_bitsinmiddle_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: and r0, r0, #24 +; THUMB7-NEXT: clz r0, r0 +; THUMB7-NEXT: lsrs r0, r0, #5 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_bitsinmiddle_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: and r0, r0, #24 +; THUMB8-NEXT: clz r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #5 +; THUMB8-NEXT: bx lr %t0 = shl i8 24, %y %t1 = and i8 %t0, %x %res = icmp eq i8 %t1, 0 @@ -135,57 +157,47 @@ ; i16 scalar define i1 @scalar_i16_signbit_eq(i16 %x, i16 %y) nounwind { -; ARM6-LABEL: scalar_i16_signbit_eq: -; ARM6: @ %bb.0: -; ARM6-NEXT: ldr r2, .LCPI3_0 -; ARM6-NEXT: uxth r1, r1 -; ARM6-NEXT: and r0, r0, r2, lsl r1 -; ARM6-NEXT: uxth r0, r0 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: bx lr -; ARM6-NEXT: .p2align 2 -; ARM6-NEXT: @ %bb.1: -; ARM6-NEXT: .LCPI3_0: -; ARM6-NEXT: .long 4294934528 @ 0xffff8000 -; -; ARM78-LABEL: scalar_i16_signbit_eq: -; ARM78: @ %bb.0: -; ARM78-NEXT: movw r2, #32768 -; ARM78-NEXT: uxth r1, r1 -; ARM78-NEXT: movt r2, #65535 -; ARM78-NEXT: and r0, r0, r2, lsl r1 -; ARM78-NEXT: uxth r0, r0 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: bx lr +; ARM-LABEL: scalar_i16_signbit_eq: +; ARM: @ %bb.0: +; ARM-NEXT: uxth r1, r1 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: lsr r0, r0, r1 +; ARM-NEXT: mov r1, #1 +; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: eor r0, r1, r0, lsr #15 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_signbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: ldr r2, .LCPI3_0 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: uxth r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #15 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr -; THUMB6-NEXT: .p2align 2 -; THUMB6-NEXT: @ %bb.1: -; THUMB6-NEXT: .LCPI3_0: -; THUMB6-NEXT: .long 4294934528 @ 0xffff8000 ; -; THUMB78-LABEL: scalar_i16_signbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: movw r2, #32768 -; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: movt r2, #65535 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i16_signbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxth r1, r1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: eor.w r0, r1, r0, lsr #15 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i16_signbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: uxth r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: eor.w r0, r1, r0, lsr #15 +; THUMB8-NEXT: bx lr %t0 = shl i16 32768, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -196,34 +208,39 @@ ; ARM-LABEL: scalar_i16_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxth r0, r0 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mov r2, #1 +; ARM-NEXT: bic r0, r2, r0, lsr r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i16_lowestbit_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: uxth r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i16_lowestbit_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i16_lowestbit_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxth r1, r1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: movs r1, #1 +; THUMB7-NEXT: bic.w r0, r1, r0 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i16_lowestbit_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: uxth r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: movs r1, #1 +; THUMB8-NEXT: bic.w r0, r1, r0 +; THUMB8-NEXT: bx lr %t0 = shl i16 1, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -234,9 +251,9 @@ ; ARM-LABEL: scalar_i16_bitsinmiddle_eq: ; ARM: @ %bb.0: ; ARM-NEXT: uxth r1, r1 -; ARM-NEXT: mov r2, #4080 -; ARM-NEXT: and r0, r0, r2, lsl r1 ; ARM-NEXT: uxth r0, r0 +; ARM-NEXT: mov r2, #4080 +; ARM-NEXT: and r0, r2, r0, lsr r1 ; ARM-NEXT: clz r0, r0 ; ARM-NEXT: lsr r0, r0, #5 ; ARM-NEXT: bx lr @@ -244,25 +261,34 @@ ; THUMB6-LABEL: scalar_i16_bitsinmiddle_eq: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxth r1, r1 -; THUMB6-NEXT: movs r2, #255 -; THUMB6-NEXT: lsls r2, r2, #4 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxth r1, r2 +; THUMB6-NEXT: uxth r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #255 +; THUMB6-NEXT: lsls r1, r1, #4 +; THUMB6-NEXT: ands r1, r0 ; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i16_bitsinmiddle_eq: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxth r1, r1 -; THUMB78-NEXT: mov.w r2, #4080 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxth r0, r0 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i16_bitsinmiddle_eq: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxth r1, r1 +; THUMB7-NEXT: uxth r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: and r0, r0, #4080 +; THUMB7-NEXT: clz r0, r0 +; THUMB7-NEXT: lsrs r0, r0, #5 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i16_bitsinmiddle_eq: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxth r0, r0 +; THUMB8-NEXT: uxth r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: and r0, r0, #4080 +; THUMB8-NEXT: clz r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #5 +; THUMB8-NEXT: bx lr %t0 = shl i16 4080, %y %t1 = and i16 %t0, %x %res = icmp eq i16 %t1, 0 @@ -274,29 +300,25 @@ define i1 @scalar_i32_signbit_eq(i32 %x, i32 %y) nounwind { ; ARM-LABEL: scalar_i32_signbit_eq: ; ARM: @ %bb.0: -; ARM-NEXT: mov r2, #-2147483648 -; ARM-NEXT: and r0, r0, r2, lsl r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: mvn r0, r0, lsr r1 +; ARM-NEXT: lsr r0, r0, #31 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r2, #31 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: lsls r1, r1, #31 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_signbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: mov.w r2, #-2147483648 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsrs r0, r1 +; THUMB78-NEXT: mvns r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #31 ; THUMB78-NEXT: bx lr %t0 = shl i32 2147483648, %y %t1 = and i32 %t0, %x @@ -308,27 +330,23 @@ ; ARM-LABEL: scalar_i32_lowestbit_eq: ; ARM: @ %bb.0: ; ARM-NEXT: mov r2, #1 -; ARM-NEXT: and r0, r0, r2, lsl r1 -; ARM-NEXT: clz r0, r0 -; ARM-NEXT: lsr r0, r0, #5 +; ARM-NEXT: bic r0, r2, r0, lsr r1 ; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; ; THUMB78-LABEL: scalar_i32_lowestbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movs r2, #1 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: lsrs r0, r1 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 ; THUMB78-NEXT: bx lr %t0 = shl i32 1, %y %t1 = and i32 %t0, %x @@ -341,7 +359,7 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: mov r2, #65280 ; ARM6-NEXT: orr r2, r2, #16711680 -; ARM6-NEXT: and r0, r0, r2, lsl r1 +; ARM6-NEXT: and r0, r2, r0, lsr r1 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 ; ARM6-NEXT: bx lr @@ -350,18 +368,18 @@ ; ARM78: @ %bb.0: ; ARM78-NEXT: movw r2, #65280 ; ARM78-NEXT: movt r2, #255 -; ARM78-NEXT: and r0, r0, r2, lsl r1 +; ARM78-NEXT: and r0, r2, r0, lsr r1 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: ldr r2, .LCPI8_0 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: rsbs r0, r2, #0 -; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: ldr r1, .LCPI8_0 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 +; THUMB6-NEXT: adcs r0, r1 ; THUMB6-NEXT: bx lr ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: @@ -370,9 +388,9 @@ ; ; THUMB78-LABEL: scalar_i32_bitsinmiddle_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: movw r2, #65280 -; THUMB78-NEXT: movt r2, #255 -; THUMB78-NEXT: lsl.w r1, r2, r1 +; THUMB78-NEXT: lsrs r0, r1 +; THUMB78-NEXT: movw r1, #65280 +; THUMB78-NEXT: movt r1, #255 ; THUMB78-NEXT: ands r0, r1 ; THUMB78-NEXT: clz r0, r0 ; THUMB78-NEXT: lsrs r0, r0, #5 @@ -388,55 +406,44 @@ define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_signbit_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: mov r0, #-2147483648 -; ARM6-NEXT: lsl r0, r0, r2 -; ARM6-NEXT: sub r2, r2, #32 -; ARM6-NEXT: cmp r2, #0 +; ARM6-NEXT: lsr r0, r1, r2 +; ARM6-NEXT: sub r1, r2, #32 +; ARM6-NEXT: cmp r1, #0 ; ARM6-NEXT: movge r0, #0 -; ARM6-NEXT: and r0, r0, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 +; ARM6-NEXT: mvn r0, r0 +; ARM6-NEXT: lsr r0, r0, #31 ; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_signbit_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: mov r0, #-2147483648 -; ARM78-NEXT: lsl r0, r0, r2 -; ARM78-NEXT: sub r2, r2, #32 -; ARM78-NEXT: cmp r2, #0 +; ARM78-NEXT: lsr r0, r1, r2 +; ARM78-NEXT: sub r1, r2, #32 +; ARM78-NEXT: cmp r1, #0 ; ARM78-NEXT: movwge r0, #0 -; ARM78-NEXT: and r0, r0, r1 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 +; ARM78-NEXT: mvn r0, r0 +; ARM78-NEXT: lsr r0, r0, #31 ; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_signbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __lshrdi3 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: lsls r1, r0, #31 -; THUMB6-NEXT: movs r0, #0 -; THUMB6-NEXT: bl __ashldi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 -; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsls r2, r0, #31 +; THUMB6-NEXT: ands r2, r1 +; THUMB6-NEXT: rsbs r0, r2, #0 +; THUMB6-NEXT: adcs r0, r2 +; THUMB6-NEXT: pop {r7, pc} ; ; THUMB78-LABEL: scalar_i64_signbit_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: mov.w r0, #-2147483648 -; THUMB78-NEXT: lsls r0, r2 -; THUMB78-NEXT: subs r2, #32 -; THUMB78-NEXT: cmp r2, #0 +; THUMB78-NEXT: lsr.w r0, r1, r2 +; THUMB78-NEXT: sub.w r1, r2, #32 +; THUMB78-NEXT: cmp r1, #0 ; THUMB78-NEXT: it ge ; THUMB78-NEXT: movge r0, #0 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: clz r0, r0 -; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: mvns r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #31 ; THUMB78-NEXT: bx lr %t0 = shl i64 9223372036854775808, %y %t1 = and i64 %t0, %x @@ -445,97 +452,41 @@ } define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { -; ARM6-LABEL: scalar_i64_lowestbit_eq: -; ARM6: @ %bb.0: -; ARM6-NEXT: push {r11, lr} -; ARM6-NEXT: mov r12, #1 -; ARM6-NEXT: sub lr, r2, #32 -; ARM6-NEXT: lsl r3, r12, r2 -; ARM6-NEXT: rsb r2, r2, #32 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: lsr r2, r12, r2 -; ARM6-NEXT: movge r3, #0 -; ARM6-NEXT: lslge r2, r12, lr -; ARM6-NEXT: and r0, r3, r0 -; ARM6-NEXT: and r1, r2, r1 -; ARM6-NEXT: orr r0, r0, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r11, pc} -; -; ARM78-LABEL: scalar_i64_lowestbit_eq: -; ARM78: @ %bb.0: -; ARM78-NEXT: push {r11, lr} -; ARM78-NEXT: mov r12, #1 -; ARM78-NEXT: sub lr, r2, #32 -; ARM78-NEXT: lsl r3, r12, r2 -; ARM78-NEXT: rsb r2, r2, #32 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: lsr r2, r12, r2 -; ARM78-NEXT: movwge r3, #0 -; ARM78-NEXT: lslge r2, r12, lr -; ARM78-NEXT: and r0, r3, r0 -; ARM78-NEXT: and r1, r2, r1 -; ARM78-NEXT: orr r0, r0, r1 -; ARM78-NEXT: clz r0, r0 -; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r11, pc} +; ARM-LABEL: scalar_i64_lowestbit_eq: +; ARM: @ %bb.0: +; ARM-NEXT: rsb r3, r2, #32 +; ARM-NEXT: lsr r0, r0, r2 +; ARM-NEXT: sub r2, r2, #32 +; ARM-NEXT: orr r0, r0, r1, lsl r3 +; ARM-NEXT: cmp r2, #0 +; ARM-NEXT: lsrge r0, r1, r2 +; ARM-NEXT: mov r1, #1 +; ARM-NEXT: bic r0, r1, r0 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_lowestbit_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: movs r1, #0 -; THUMB6-NEXT: bl __ashldi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __lshrdi3 +; THUMB6-NEXT: movs r1, #1 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; -; THUMB7-LABEL: scalar_i64_lowestbit_eq: -; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} -; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: mov.w r12, #1 -; THUMB7-NEXT: sub.w lr, r2, #32 -; THUMB7-NEXT: lsl.w r2, r12, r2 -; THUMB7-NEXT: lsr.w r3, r12, r3 -; THUMB7-NEXT: cmp.w lr, #0 -; THUMB7-NEXT: it ge -; THUMB7-NEXT: lslge.w r3, r12, lr -; THUMB7-NEXT: it ge -; THUMB7-NEXT: movge r2, #0 -; THUMB7-NEXT: ands r1, r3 -; THUMB7-NEXT: ands r0, r2 -; THUMB7-NEXT: orrs r0, r1 -; THUMB7-NEXT: clz r0, r0 -; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} -; -; THUMB8-LABEL: scalar_i64_lowestbit_eq: -; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: sub.w lr, r2, #32 -; THUMB8-NEXT: mov.w r12, #1 -; THUMB8-NEXT: cmp.w lr, #0 -; THUMB8-NEXT: lsr.w r3, r12, r3 -; THUMB8-NEXT: lsl.w r2, r12, r2 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: lslge.w r3, r12, lr -; THUMB8-NEXT: it ge -; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r1, r3 -; THUMB8-NEXT: ands r0, r2 -; THUMB8-NEXT: orrs r0, r1 -; THUMB8-NEXT: clz r0, r0 -; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB78-LABEL: scalar_i64_lowestbit_eq: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: rsb.w r3, r2, #32 +; THUMB78-NEXT: lsrs r0, r2 +; THUMB78-NEXT: subs r2, #32 +; THUMB78-NEXT: lsl.w r3, r1, r3 +; THUMB78-NEXT: orrs r0, r3 +; THUMB78-NEXT: cmp r2, #0 +; THUMB78-NEXT: it ge +; THUMB78-NEXT: lsrge.w r0, r1, r2 +; THUMB78-NEXT: movs r1, #1 +; THUMB78-NEXT: bic.w r0, r1, r0 +; THUMB78-NEXT: bx lr %t0 = shl i64 1, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -545,118 +496,67 @@ define i1 @scalar_i64_bitsinmiddle_eq(i64 %x, i64 %y) nounwind { ; ARM6-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM6: @ %bb.0: -; ARM6-NEXT: push {r4, lr} -; ARM6-NEXT: mov r12, #16711680 -; ARM6-NEXT: sub lr, r2, #32 -; ARM6-NEXT: orr r12, r12, #-16777216 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: mov r4, #255 -; ARM6-NEXT: lsl r3, r12, r2 -; ARM6-NEXT: orr r4, r4, #65280 -; ARM6-NEXT: movge r3, #0 -; ARM6-NEXT: and r0, r3, r0 ; ARM6-NEXT: rsb r3, r2, #32 -; ARM6-NEXT: cmp lr, #0 -; ARM6-NEXT: lsr r3, r12, r3 -; ARM6-NEXT: orr r2, r3, r4, lsl r2 -; ARM6-NEXT: lslge r2, r12, lr -; ARM6-NEXT: and r1, r2, r1 -; ARM6-NEXT: orr r0, r0, r1 +; ARM6-NEXT: lsr r0, r0, r2 +; ARM6-NEXT: orr r0, r0, r1, lsl r3 +; ARM6-NEXT: sub r3, r2, #32 +; ARM6-NEXT: cmp r3, #0 +; ARM6-NEXT: lsrge r0, r1, r3 +; ARM6-NEXT: lsr r1, r1, r2 +; ARM6-NEXT: movge r1, #0 +; ARM6-NEXT: pkhbt r0, r1, r0 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: pop {r4, pc} +; ARM6-NEXT: bx lr ; ; ARM78-LABEL: scalar_i64_bitsinmiddle_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: push {r4, lr} -; ARM78-NEXT: movw r12, #0 -; ARM78-NEXT: sub lr, r2, #32 -; ARM78-NEXT: movt r12, #65535 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: lsl r3, r12, r2 -; ARM78-NEXT: movw r4, #65535 -; ARM78-NEXT: movwge r3, #0 -; ARM78-NEXT: and r0, r3, r0 ; ARM78-NEXT: rsb r3, r2, #32 -; ARM78-NEXT: cmp lr, #0 -; ARM78-NEXT: lsr r3, r12, r3 -; ARM78-NEXT: orr r2, r3, r4, lsl r2 -; ARM78-NEXT: lslge r2, r12, lr -; ARM78-NEXT: and r1, r2, r1 -; ARM78-NEXT: orr r0, r0, r1 +; ARM78-NEXT: lsr r0, r0, r2 +; ARM78-NEXT: orr r0, r0, r1, lsl r3 +; ARM78-NEXT: sub r3, r2, #32 +; ARM78-NEXT: cmp r3, #0 +; ARM78-NEXT: lsrge r0, r1, r3 +; ARM78-NEXT: lsr r1, r1, r2 +; ARM78-NEXT: movwge r1, #0 +; ARM78-NEXT: pkhbt r0, r1, r0 ; ARM78-NEXT: clz r0, r0 ; ARM78-NEXT: lsr r0, r0, #5 -; ARM78-NEXT: pop {r4, pc} +; ARM78-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i64_bitsinmiddle_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: mov r4, r1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: ldr r0, .LCPI11_0 -; THUMB6-NEXT: ldr r1, .LCPI11_1 -; THUMB6-NEXT: bl __ashldi3 -; THUMB6-NEXT: ands r1, r4 -; THUMB6-NEXT: ands r0, r5 -; THUMB6-NEXT: orrs r0, r1 -; THUMB6-NEXT: rsbs r1, r0, #0 +; THUMB6-NEXT: push {r7, lr} +; THUMB6-NEXT: bl __lshrdi3 +; THUMB6-NEXT: ldr r2, .LCPI11_0 +; THUMB6-NEXT: ands r2, r0 +; THUMB6-NEXT: uxth r0, r1 +; THUMB6-NEXT: adds r1, r2, r0 +; THUMB6-NEXT: rsbs r0, r1, #0 ; THUMB6-NEXT: adcs r0, r1 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r7, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI11_0: ; THUMB6-NEXT: .long 4294901760 @ 0xffff0000 -; THUMB6-NEXT: .LCPI11_1: -; THUMB6-NEXT: .long 65535 @ 0xffff ; -; THUMB7-LABEL: scalar_i64_bitsinmiddle_eq: -; THUMB7: @ %bb.0: -; THUMB7-NEXT: push {r7, lr} -; THUMB7-NEXT: movw r3, #65535 -; THUMB7-NEXT: movw lr, #0 -; THUMB7-NEXT: lsl.w r12, r3, r2 -; THUMB7-NEXT: rsb.w r3, r2, #32 -; THUMB7-NEXT: movt lr, #65535 -; THUMB7-NEXT: lsr.w r3, lr, r3 -; THUMB7-NEXT: orr.w r3, r3, r12 -; THUMB7-NEXT: sub.w r12, r2, #32 -; THUMB7-NEXT: lsl.w r2, lr, r2 -; THUMB7-NEXT: cmp.w r12, #0 -; THUMB7-NEXT: it ge -; THUMB7-NEXT: lslge.w r3, lr, r12 -; THUMB7-NEXT: it ge -; THUMB7-NEXT: movge r2, #0 -; THUMB7-NEXT: ands r1, r3 -; THUMB7-NEXT: ands r0, r2 -; THUMB7-NEXT: orrs r0, r1 -; THUMB7-NEXT: clz r0, r0 -; THUMB7-NEXT: lsrs r0, r0, #5 -; THUMB7-NEXT: pop {r7, pc} -; -; THUMB8-LABEL: scalar_i64_bitsinmiddle_eq: -; THUMB8: @ %bb.0: -; THUMB8-NEXT: .save {r7, lr} -; THUMB8-NEXT: push {r7, lr} -; THUMB8-NEXT: movw r3, #65535 -; THUMB8-NEXT: movw lr, #0 -; THUMB8-NEXT: lsl.w r12, r3, r2 -; THUMB8-NEXT: rsb.w r3, r2, #32 -; THUMB8-NEXT: movt lr, #65535 -; THUMB8-NEXT: lsr.w r3, lr, r3 -; THUMB8-NEXT: orr.w r3, r3, r12 -; THUMB8-NEXT: sub.w r12, r2, #32 -; THUMB8-NEXT: cmp.w r12, #0 -; THUMB8-NEXT: lsl.w r2, lr, r2 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: lslge.w r3, lr, r12 -; THUMB8-NEXT: it ge -; THUMB8-NEXT: movge r2, #0 -; THUMB8-NEXT: ands r1, r3 -; THUMB8-NEXT: ands r0, r2 -; THUMB8-NEXT: orrs r0, r1 -; THUMB8-NEXT: clz r0, r0 -; THUMB8-NEXT: lsrs r0, r0, #5 -; THUMB8-NEXT: pop {r7, pc} +; THUMB78-LABEL: scalar_i64_bitsinmiddle_eq: +; THUMB78: @ %bb.0: +; THUMB78-NEXT: rsb.w r3, r2, #32 +; THUMB78-NEXT: lsrs r0, r2 +; THUMB78-NEXT: lsl.w r3, r1, r3 +; THUMB78-NEXT: orrs r0, r3 +; THUMB78-NEXT: sub.w r3, r2, #32 +; THUMB78-NEXT: cmp r3, #0 +; THUMB78-NEXT: it ge +; THUMB78-NEXT: lsrge.w r0, r1, r3 +; THUMB78-NEXT: lsr.w r1, r1, r2 +; THUMB78-NEXT: it ge +; THUMB78-NEXT: movge r1, #0 +; THUMB78-NEXT: pkhbt r0, r1, r0 +; THUMB78-NEXT: clz r0, r0 +; THUMB78-NEXT: lsrs r0, r0, #5 +; THUMB78-NEXT: bx lr %t0 = shl i64 281474976645120, %y %t1 = and i64 %t0, %x %res = icmp eq i64 %t1, 0 @@ -673,32 +573,25 @@ ; ARM6-NEXT: push {r11, lr} ; ARM6-NEXT: ldr r12, [sp, #8] ; ARM6-NEXT: mov lr, #1 -; ARM6-NEXT: and r0, r0, lr, lsl r12 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #12] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r1, r1, lr, lsl r12 +; ARM6-NEXT: bic r1, lr, r1, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #16] -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: and r2, r2, lr, lsl r12 +; ARM6-NEXT: bic r2, lr, r2, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #20] -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: and r3, r3, lr, lsl r12 -; ARM6-NEXT: lsr r2, r2, #5 -; ARM6-NEXT: clz r3, r3 -; ARM6-NEXT: lsr r3, r3, #5 +; ARM6-NEXT: bic r3, lr, r3, lsr r12 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_splat_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: vmov.i32 q8, #0x1 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d16, d17}, [r12] ; ARM78-NEXT: vmov d19, r2, r3 +; ARM78-NEXT: vneg.s32 q8, q8 ; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q9, q8 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -706,42 +599,40 @@ ; ; THUMB6-LABEL: vec_4xi32_splat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r6, lr} -; THUMB6-NEXT: ldr r5, [sp, #16] +; THUMB6-NEXT: push {r4, r5, r7, lr} +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsrs r0, r4 ; THUMB6-NEXT: movs r4, #1 -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsls r6, r5 -; THUMB6-NEXT: ands r6, r0 -; THUMB6-NEXT: rsbs r0, r6, #0 -; THUMB6-NEXT: adcs r0, r6 +; THUMB6-NEXT: ands r0, r4 +; THUMB6-NEXT: rsbs r5, r0, #0 +; THUMB6-NEXT: adcs r0, r5 ; THUMB6-NEXT: ldr r5, [sp, #20] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsls r6, r5 -; THUMB6-NEXT: ands r6, r1 -; THUMB6-NEXT: rsbs r1, r6, #0 -; THUMB6-NEXT: adcs r1, r6 +; THUMB6-NEXT: lsrs r1, r5 +; THUMB6-NEXT: ands r1, r4 +; THUMB6-NEXT: rsbs r5, r1, #0 +; THUMB6-NEXT: adcs r1, r5 ; THUMB6-NEXT: ldr r5, [sp, #24] -; THUMB6-NEXT: mov r6, r4 -; THUMB6-NEXT: lsls r6, r5 -; THUMB6-NEXT: ands r6, r2 -; THUMB6-NEXT: rsbs r2, r6, #0 -; THUMB6-NEXT: adcs r2, r6 +; THUMB6-NEXT: lsrs r2, r5 +; THUMB6-NEXT: ands r2, r4 +; THUMB6-NEXT: rsbs r5, r2, #0 +; THUMB6-NEXT: adcs r2, r5 ; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsls r4, r5 -; THUMB6-NEXT: ands r4, r3 -; THUMB6-NEXT: rsbs r3, r4, #0 +; THUMB6-NEXT: lsrs r3, r5 +; THUMB6-NEXT: ands r3, r4 +; THUMB6-NEXT: rsbs r4, r3, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r6, pc} +; THUMB6-NEXT: pop {r4, r5, r7, pc} ; ; THUMB78-LABEL: vec_4xi32_splat_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: vmov.i32 q8, #0x1 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] ; THUMB78-NEXT: vmov d19, r2, r3 +; THUMB78-NEXT: vneg.s32 q8, q8 ; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q9, q8 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -757,20 +648,16 @@ ; ARM6: @ %bb.0: ; ARM6-NEXT: ldr r12, [sp, #4] ; ARM6-NEXT: mov r0, #1 -; ARM6-NEXT: and r0, r1, r0, lsl r12 +; ARM6-NEXT: bic r1, r0, r1, lsr r12 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r1, r0, #5 ; ARM6-NEXT: mov r0, #65280 ; ARM6-NEXT: orr r0, r0, #16711680 -; ARM6-NEXT: and r0, r2, r0, lsl r12 -; ARM6-NEXT: ldr r12, [sp, #12] +; ARM6-NEXT: and r0, r0, r2, lsr r12 ; ARM6-NEXT: clz r0, r0 ; ARM6-NEXT: lsr r2, r0, #5 -; ARM6-NEXT: mov r0, #-2147483648 -; ARM6-NEXT: and r0, r3, r0, lsl r12 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: lsr r3, r0, #5 +; ARM6-NEXT: ldr r0, [sp, #12] +; ARM6-NEXT: mvn r0, r3, lsr r0 +; ARM6-NEXT: lsr r3, r0, #31 ; ARM6-NEXT: mov r0, #1 ; ARM6-NEXT: bx lr ; @@ -798,27 +685,26 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r0, [sp, #12] +; THUMB6-NEXT: lsrs r1, r0 ; THUMB6-NEXT: movs r0, #1 -; THUMB6-NEXT: mov r5, r0 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #24] -; THUMB6-NEXT: ldr r5, .LCPI13_0 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r2 -; THUMB6-NEXT: rsbs r2, r5, #0 -; THUMB6-NEXT: adcs r2, r5 +; THUMB6-NEXT: ands r1, r0 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 +; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: lsrs r2, r4 +; THUMB6-NEXT: ldr r4, .LCPI13_0 +; THUMB6-NEXT: ands r4, r2 +; THUMB6-NEXT: rsbs r2, r4, #0 +; THUMB6-NEXT: adcs r2, r4 +; THUMB6-NEXT: ldr r4, [sp, #20] +; THUMB6-NEXT: lsrs r3, r4 ; THUMB6-NEXT: lsls r4, r0, #31 -; THUMB6-NEXT: ldr r5, [sp, #28] -; THUMB6-NEXT: lsls r4, r5 ; THUMB6-NEXT: ands r4, r3 ; THUMB6-NEXT: rsbs r3, r4, #0 ; THUMB6-NEXT: adcs r3, r4 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; THUMB6-NEXT: .p2align 2 ; THUMB6-NEXT: @ %bb.1: ; THUMB6-NEXT: .LCPI13_0: @@ -858,28 +744,23 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsl r2 +; ARM6-NEXT: bic r1, lr, r1, lsr r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsl r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsl r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 +; ARM6-NEXT: bic r3, lr, r3, lsr r2 ; ARM6-NEXT: mov r2, #1 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; ARM78: @ %bb.0: -; ARM78-NEXT: vmov.i32 q8, #0x1 ; ARM78-NEXT: mov r12, sp -; ARM78-NEXT: vld1.64 {d18, d19}, [r12] -; ARM78-NEXT: vshl.u32 q8, q8, q9 +; ARM78-NEXT: vld1.64 {d16, d17}, [r12] ; ARM78-NEXT: vmov d19, r2, r3 +; ARM78-NEXT: vneg.s32 q8, q8 ; ARM78-NEXT: vmov d18, r0, r1 -; ARM78-NEXT: vtst.32 q8, q8, q9 +; ARM78-NEXT: vmov.i32 q10, #0x1 +; ARM78-NEXT: vshl.u32 q8, q9, q8 +; ARM78-NEXT: vtst.32 q8, q8, q10 ; ARM78-NEXT: vmvn q8, q8 ; ARM78-NEXT: vmovn.i32 d16, q8 ; ARM78-NEXT: vmov r0, r1, d16 @@ -887,37 +768,35 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsrs r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsrs r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r3 -; THUMB6-NEXT: rsbs r3, r5, #0 -; THUMB6-NEXT: adcs r3, r5 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: lsrs r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r4, r3, #0 +; THUMB6-NEXT: adcs r3, r4 +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef0_eq: ; THUMB78: @ %bb.0: -; THUMB78-NEXT: vmov.i32 q8, #0x1 ; THUMB78-NEXT: mov r12, sp -; THUMB78-NEXT: vld1.64 {d18, d19}, [r12] -; THUMB78-NEXT: vshl.u32 q8, q8, q9 +; THUMB78-NEXT: vld1.64 {d16, d17}, [r12] ; THUMB78-NEXT: vmov d19, r2, r3 +; THUMB78-NEXT: vneg.s32 q8, q8 ; THUMB78-NEXT: vmov d18, r0, r1 -; THUMB78-NEXT: vtst.32 q8, q8, q9 +; THUMB78-NEXT: vmov.i32 q10, #0x1 +; THUMB78-NEXT: vshl.u32 q8, q9, q8 +; THUMB78-NEXT: vtst.32 q8, q8, q10 ; THUMB78-NEXT: vmvn q8, q8 ; THUMB78-NEXT: vmovn.i32 d16, q8 ; THUMB78-NEXT: vmov r0, r1, d16 @@ -934,16 +813,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsl r2 +; ARM6-NEXT: bic r1, lr, r1, lsr r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsl r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsl r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 +; ARM6-NEXT: bic r3, lr, r3, lsr r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef1_eq: @@ -962,26 +835,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsrs r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsrs r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsls r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsrs r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef1_eq: ; THUMB78: @ %bb.0: @@ -1008,16 +879,10 @@ ; ARM6-NEXT: ldr r2, [sp, #12] ; ARM6-NEXT: mov lr, #1 ; ARM6-NEXT: ldr r12, [sp, #8] -; ARM6-NEXT: and r1, r1, lr, lsl r2 +; ARM6-NEXT: bic r1, lr, r1, lsr r2 ; ARM6-NEXT: ldr r2, [sp, #20] -; ARM6-NEXT: and r0, r0, lr, lsl r12 -; ARM6-NEXT: clz r1, r1 -; ARM6-NEXT: clz r0, r0 -; ARM6-NEXT: and r2, r3, lr, lsl r2 -; ARM6-NEXT: lsr r1, r1, #5 -; ARM6-NEXT: clz r2, r2 -; ARM6-NEXT: lsr r0, r0, #5 -; ARM6-NEXT: lsr r3, r2, #5 +; ARM6-NEXT: bic r0, lr, r0, lsr r12 +; ARM6-NEXT: bic r3, lr, r3, lsr r2 ; ARM6-NEXT: pop {r11, pc} ; ; ARM78-LABEL: vec_4xi32_nonsplat_undef2_eq: @@ -1036,26 +901,24 @@ ; ; THUMB6-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB6: @ %bb.0: -; THUMB6-NEXT: push {r4, r5, r7, lr} -; THUMB6-NEXT: ldr r4, [sp, #16] +; THUMB6-NEXT: push {r4, lr} +; THUMB6-NEXT: ldr r2, [sp, #8] +; THUMB6-NEXT: lsrs r0, r2 ; THUMB6-NEXT: movs r2, #1 -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r0 -; THUMB6-NEXT: rsbs r0, r5, #0 -; THUMB6-NEXT: adcs r0, r5 +; THUMB6-NEXT: ands r0, r2 +; THUMB6-NEXT: rsbs r4, r0, #0 +; THUMB6-NEXT: adcs r0, r4 +; THUMB6-NEXT: ldr r4, [sp, #12] +; THUMB6-NEXT: lsrs r1, r4 +; THUMB6-NEXT: ands r1, r2 +; THUMB6-NEXT: rsbs r4, r1, #0 +; THUMB6-NEXT: adcs r1, r4 ; THUMB6-NEXT: ldr r4, [sp, #20] -; THUMB6-NEXT: mov r5, r2 -; THUMB6-NEXT: lsls r5, r4 -; THUMB6-NEXT: ands r5, r1 -; THUMB6-NEXT: rsbs r1, r5, #0 -; THUMB6-NEXT: adcs r1, r5 -; THUMB6-NEXT: ldr r4, [sp, #28] -; THUMB6-NEXT: lsls r2, r4 -; THUMB6-NEXT: ands r2, r3 -; THUMB6-NEXT: rsbs r3, r2, #0 +; THUMB6-NEXT: lsrs r3, r4 +; THUMB6-NEXT: ands r3, r2 +; THUMB6-NEXT: rsbs r2, r3, #0 ; THUMB6-NEXT: adcs r3, r2 -; THUMB6-NEXT: pop {r4, r5, r7, pc} +; THUMB6-NEXT: pop {r4, pc} ; ; THUMB78-LABEL: vec_4xi32_nonsplat_undef2_eq: ; THUMB78: @ %bb.0: @@ -1081,49 +944,41 @@ ;------------------------------------------------------------------------------; define i1 @scalar_i8_signbit_ne(i8 %x, i8 %y) nounwind { -; ARM6-LABEL: scalar_i8_signbit_ne: -; ARM6: @ %bb.0: -; ARM6-NEXT: uxtb r1, r1 -; ARM6-NEXT: mvn r2, #127 -; ARM6-NEXT: and r0, r0, r2, lsl r1 -; ARM6-NEXT: uxtb r0, r0 -; ARM6-NEXT: cmp r0, #0 -; ARM6-NEXT: movne r0, #1 -; ARM6-NEXT: bx lr -; -; ARM78-LABEL: scalar_i8_signbit_ne: -; ARM78: @ %bb.0: -; ARM78-NEXT: uxtb r1, r1 -; ARM78-NEXT: mvn r2, #127 -; ARM78-NEXT: and r0, r0, r2, lsl r1 -; ARM78-NEXT: uxtb r0, r0 -; ARM78-NEXT: cmp r0, #0 -; ARM78-NEXT: movwne r0, #1 -; ARM78-NEXT: bx lr +; ARM-LABEL: scalar_i8_signbit_ne: +; ARM: @ %bb.0: +; ARM-NEXT: uxtb r1, r1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: lsr r0, r0, r1 +; ARM-NEXT: uxtb r0, r0 +; ARM-NEXT: lsr r0, r0, #7 +; ARM-NEXT: bx lr ; ; THUMB6-LABEL: scalar_i8_signbit_ne: ; THUMB6: @ %bb.0: ; THUMB6-NEXT: uxtb r1, r1 -; THUMB6-NEXT: movs r2, #127 -; THUMB6-NEXT: mvns r2, r2 -; THUMB6-NEXT: lsls r2, r1 -; THUMB6-NEXT: ands r2, r0 -; THUMB6-NEXT: uxtb r0, r2 -; THUMB6-NEXT: subs r1, r0, #1 -; THUMB6-NEXT: sbcs r0, r1 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r1 +; THUMB6-NEXT: uxtb r0, r0 +; THUMB6-NEXT: lsrs r0, r0, #7 ; THUMB6-NEXT: bx lr ; -; THUMB78-LABEL: scalar_i8_signbit_ne: -; THUMB78: @ %bb.0: -; THUMB78-NEXT: uxtb r1, r1 -; THUMB78-NEXT: mvn r2, #127 -; THUMB78-NEXT: lsl.w r1, r2, r1 -; THUMB78-NEXT: ands r0, r1 -; THUMB78-NEXT: uxtb r0, r0 -; THUMB78-NEXT: cmp r0, #0 -; THUMB78-NEXT: it ne -; THUMB78-NEXT: movne r0, #1 -; THUMB78-NEXT: bx lr +; THUMB7-LABEL: scalar_i8_signbit_ne: +; THUMB7: @ %bb.0: +; THUMB7-NEXT: uxtb r1, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r1 +; THUMB7-NEXT: uxtb r0, r0 +; THUMB7-NEXT: lsrs r0, r0, #7 +; THUMB7-NEXT: bx lr +; +; THUMB8-LABEL: scalar_i8_signbit_ne: +; THUMB8: @ %bb.0: +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: uxtb r1, r1 +; THUMB8-NEXT: lsrs r0, r1 +; THUMB8-NEXT: uxtb r0, r0 +; THUMB8-NEXT: lsrs r0, r0, #7 +; THUMB8-NEXT: bx lr %t0 = shl i8 128, %y %t1 = and i8 %t0, %x %res = icmp ne i8 %t1, 0 ; we are perfectly happy with 'ne' predicate Index: test/CodeGen/PowerPC/shift-cmp.ll =================================================================== --- test/CodeGen/PowerPC/shift-cmp.ll +++ test/CodeGen/PowerPC/shift-cmp.ll @@ -6,8 +6,7 @@ define i1 @and_cmp_variable_power_of_two(i32 %x, i32 %y) { ; CHECK-LABEL: and_cmp_variable_power_of_two: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 32 -; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31 +; CHECK-NEXT: srw 3, 3, 4 ; CHECK-NEXT: blr %shl = shl i32 1, %y %and = and i32 %x, %shl @@ -18,8 +17,7 @@ define i1 @and_cmp_variable_power_of_two_64(i64 %x, i64 %y) { ; CHECK-LABEL: and_cmp_variable_power_of_two_64: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 64 -; CHECK-NEXT: rldcl 3, 3, 4, 63 +; CHECK-NEXT: srd 3, 3, 4 ; CHECK-NEXT: blr %shl = shl i64 1, %y %and = and i64 %x, %shl @@ -30,9 +28,8 @@ define i1 @and_ncmp_variable_power_of_two(i32 %x, i32 %y) { ; CHECK-LABEL: and_ncmp_variable_power_of_two: ; CHECK: # %bb.0: -; CHECK-NEXT: subfic 4, 4, 32 -; CHECK-NEXT: nor 3, 3, 3 -; CHECK-NEXT: rlwnm 3, 3, 4, 31, 31 +; CHECK-NEXT: srw 3, 3, 4 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %shl = shl i32 1, %y %and = and i32 %x, %shl @@ -43,9 +40,8 @@ define i1 @and_ncmp_variable_power_of_two_64(i64 %x, i64 %y) { ; CHECK-LABEL: and_ncmp_variable_power_of_two_64: ; CHECK: # %bb.0: -; CHECK-NEXT: not 3, 3 -; CHECK-NEXT: subfic 4, 4, 64 -; CHECK-NEXT: rldcl 3, 3, 4, 63 +; CHECK-NEXT: srd 3, 3, 4 +; CHECK-NEXT: xori 3, 3, 1 ; CHECK-NEXT: blr %shl = shl i64 1, %y %and = and i64 %x, %shl Index: test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll =================================================================== --- test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll +++ test/CodeGen/X86/hoist-and-by-const-from-lshr-in-eqcmp-zero.ll @@ -23,19 +23,18 @@ ; X86-LABEL: scalar_i8_signbit_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: testb $-128, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: testb $-128, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = lshr i8 128, %y @@ -48,19 +47,18 @@ ; X86-LABEL: scalar_i8_lowestbit_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $1, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: testb $1, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_lowestbit_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $1, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: testb $1, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = lshr i8 1, %y @@ -73,19 +71,18 @@ ; X86-LABEL: scalar_i8_bitsinmiddle_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $24, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: testb $24, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_bitsinmiddle_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $24, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shlb %cl, %dil +; X64-NEXT: testb $24, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = lshr i8 24, %y @@ -100,36 +97,33 @@ ; X86-BMI1-LABEL: scalar_i16_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $32768, %eax # imm = 0x8000 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $32768, %ecx # imm = 0x8000 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $32768, %eax # imm = 0x8000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $32768, %edi # imm = 0x8000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $32768, %eax # imm = 0x8000 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i16 32768, %y @@ -142,36 +136,33 @@ ; X86-BMI1-LABEL: scalar_i16_lowestbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $1, %eax -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $1, %al ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_lowestbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testb $1, %al ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_lowestbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $1, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testb $1, %dil ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_lowestbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testb $1, %al ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i16 1, %y @@ -184,36 +175,33 @@ ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $4080, %edi # imm = 0xFF0 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i16 4080, %y @@ -228,36 +216,33 @@ ; X86-BMI1-LABEL: scalar_i32_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i32 2147483648, %y @@ -270,36 +255,33 @@ ; X86-BMI1-LABEL: scalar_i32_lowestbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $1, %eax -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testb $1, %al ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_lowestbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $1, %ecx -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testb $1, %al ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_lowestbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $1, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testb $1, %dil ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_lowestbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testb $1, %al ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i32 1, %y @@ -312,36 +294,33 @@ ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00 -; X86-BMI2-NEXT: shrxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shrl %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shll %cl, %edi +; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X64-BMI2-NEXT: shrxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shlxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i32 16776960, %y @@ -357,55 +336,44 @@ ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI1-NEXT: xorl %edx, %edx -; X86-BMI1-NEXT: xorl %esi, %esi -; X86-BMI1-NEXT: shrdl %cl, %eax, %esi -; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %eax, %edx ; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: cmovnel %eax, %esi -; X86-BMI1-NEXT: cmovnel %edx, %eax -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI1-NEXT: orl %esi, %eax +; X86-BMI1-NEXT: cmovnel %esi, %edx +; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: popl %esi ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i64_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI2-NEXT: xorl %edx, %edx -; X86-BMI2-NEXT: xorl %esi, %esi -; X86-BMI2-NEXT: shrdl %cl, %eax, %esi -; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: cmovnel %eax, %esi -; X86-BMI2-NEXT: cmovnel %edx, %eax -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %eax -; X86-BMI2-NEXT: orl %esi, %eax +; X86-BMI2-NEXT: cmovel %edx, %eax +; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al -; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i64_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shlq %cl, %rdi +; X64-BMI1-NEXT: shrq $63, %rdi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: shrq $63, %rax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 9223372036854775808, %y @@ -415,34 +383,42 @@ } define i1 @scalar_i64_lowestbit_eq(i64 %x, i64 %y) nounwind { -; X86-LABEL: scalar_i64_lowestbit_eq: -; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl $1, %edx -; X86-NEXT: shrdl %cl, %eax, %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: cmovnel %eax, %edx -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl $0, %edx -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-BMI1-LABEL: scalar_i64_lowestbit_eq: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shll %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: cmovel %eax, %edx +; X86-BMI1-NEXT: testb $1, %dl +; X86-BMI1-NEXT: sete %al +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: scalar_i64_lowestbit_eq: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shlxl %eax, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: cmovel %ecx, %edx +; X86-BMI2-NEXT: testb $1, %dl +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i64_lowestbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movl $1, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shlq %cl, %rdi +; X64-BMI1-NEXT: testb $1, %dil ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_lowestbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $1, %eax -; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: testb $1, %al ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 1, %y @@ -456,17 +432,18 @@ ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-BMI1-NEXT: movl $-65536, %edx # imm = 0xFFFF0000 -; X86-BMI1-NEXT: shrdl %cl, %eax, %edx -; X86-BMI1-NEXT: shrl %cl, %eax -; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %eax, %esi +; X86-BMI1-NEXT: shll %cl, %esi +; X86-BMI1-NEXT: shldl %cl, %eax, %edx +; X86-BMI1-NEXT: xorl %eax, %eax ; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: cmovnel %eax, %edx -; X86-BMI1-NEXT: cmovel %eax, %esi -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: orl %edx, %esi +; X86-BMI1-NEXT: cmovnel %esi, %edx +; X86-BMI1-NEXT: movzwl %dx, %ecx +; X86-BMI1-NEXT: cmovel %esi, %eax +; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-BMI1-NEXT: orl %ecx, %eax ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: popl %esi ; X86-BMI1-NEXT: retl @@ -475,17 +452,17 @@ ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $65535, %eax # imm = 0xFFFF -; X86-BMI2-NEXT: movl $-65536, %edx # imm = 0xFFFF0000 -; X86-BMI2-NEXT: shrdl %cl, %eax, %edx -; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shldl %cl, %eax, %edx +; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax ; X86-BMI2-NEXT: xorl %esi, %esi ; X86-BMI2-NEXT: testb $32, %cl ; X86-BMI2-NEXT: cmovnel %eax, %edx +; X86-BMI2-NEXT: movzwl %dx, %ecx ; X86-BMI2-NEXT: cmovel %eax, %esi -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: andl $-65536, %esi # imm = 0xFFFF0000 +; X86-BMI2-NEXT: orl %ecx, %esi ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl @@ -493,18 +470,18 @@ ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shrq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shlq %cl, %rdi +; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 +; X64-BMI1-NEXT: testq %rax, %rdi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: shrxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shlxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 +; X64-BMI2-NEXT: testq %rcx, %rax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = lshr i64 281474976645120, %y @@ -518,37 +495,48 @@ ;------------------------------------------------------------------------------; define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; SSE2-LABEL: vec_4xi32_splat_eq: -; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = [1,1,1,1] -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: psrld %xmm2, %xmm5 -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: psrld %xmm1, %xmm3 -; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1] -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3] -; SSE2-NEXT: andps %xmm5, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: ret{{[l|q]}} +; X86-SSE2-LABEL: vec_4xi32_splat_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pxor %xmm2, %xmm2 +; X86-SSE2-NEXT: pslld $23, %xmm1 +; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X86-SSE2-NEXT: retl ; ; AVX2-LABEL: vec_4xi32_splat_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} +; +; X64-SSE2-LABEL: vec_4xi32_splat_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pxor %xmm2, %xmm2 +; X64-SSE2-NEXT: pslld $23, %xmm1 +; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X64-SSE2-NEXT: retq %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -594,37 +582,48 @@ } define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { -; SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: -; SSE2: # %bb.0: -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa {{.*#+}} xmm3 = <1,1,u,1> -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm5 -; SSE2-NEXT: psrld %xmm2, %xmm5 -; SSE2-NEXT: punpcklqdq {{.*#+}} xmm5 = xmm5[0],xmm4[0] -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] -; SSE2-NEXT: movdqa %xmm3, %xmm4 -; SSE2-NEXT: psrld %xmm2, %xmm4 -; SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] -; SSE2-NEXT: psrld %xmm1, %xmm3 -; SSE2-NEXT: punpckhqdq {{.*#+}} xmm3 = xmm3[1],xmm4[1] -; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[0,3],xmm3[0,3] -; SSE2-NEXT: andps %xmm5, %xmm0 -; SSE2-NEXT: pxor %xmm1, %xmm1 -; SSE2-NEXT: pcmpeqd %xmm1, %xmm0 -; SSE2-NEXT: ret{{[l|q]}} +; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: +; X86-SSE2: # %bb.0: +; X86-SSE2-NEXT: pxor %xmm2, %xmm2 +; X86-SSE2-NEXT: pslld $23, %xmm1 +; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 +; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X86-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X86-SSE2-NEXT: pand {{\.LCPI.*}}, %xmm0 +; X86-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X86-SSE2-NEXT: retl ; ; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsrlvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsllvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} +; +; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: +; X64-SSE2: # %bb.0: +; X64-SSE2-NEXT: pxor %xmm2, %xmm2 +; X64-SSE2-NEXT: pslld $23, %xmm1 +; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 +; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm1, %xmm0 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] +; X64-SSE2-NEXT: pmuludq %xmm3, %xmm1 +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3] +; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; X64-SSE2-NEXT: pand {{.*}}(%rip), %xmm0 +; X64-SSE2-NEXT: pcmpeqd %xmm2, %xmm0 +; X64-SSE2-NEXT: retq %t0 = lshr <4 x i32> , %y %t1 = and <4 x i32> %t0, %x %res = icmp eq <4 x i32> %t1, @@ -713,20 +712,19 @@ ; X86-LABEL: scalar_i8_signbit_ne: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shrb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) -; X86-NEXT: setne %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shlb %cl, %al +; X86-NEXT: shrb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_ne: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shrb %cl, %al -; X64-NEXT: testb %dil, %al -; X64-NEXT: setne %al +; X64-NEXT: shlb %cl, %al +; X64-NEXT: shrb $7, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = lshr i8 128, %y %t1 = and i8 %t0, %x Index: test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll =================================================================== --- test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll +++ test/CodeGen/X86/hoist-and-by-const-from-shl-in-eqcmp-zero.ll @@ -23,19 +23,18 @@ ; X86-LABEL: scalar_i8_signbit_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb %cl, %al +; X86-NEXT: testb $-128, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: testb $-128, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = shl i8 128, %y @@ -68,19 +67,18 @@ ; X86-LABEL: scalar_i8_bitsinmiddle_eq: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $24, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb %cl, %al +; X86-NEXT: testb $24, %al ; X86-NEXT: sete %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_bitsinmiddle_eq: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $24, %al ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: testb %dil, %al +; X64-NEXT: shrb %cl, %dil +; X64-NEXT: testb $24, %dil ; X64-NEXT: sete %al ; X64-NEXT: retq %t0 = shl i8 24, %y @@ -95,36 +93,36 @@ ; X86-BMI1-LABEL: scalar_i16_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_signbit_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $-32768, %ecx # imm = 0x8000 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $-32768, %eax # imm = 0x8000 +; X64-BMI1-NEXT: movzwl %di, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: testl $32768, %eax # imm = 0x8000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $-32768, %eax # imm = 0x8000 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: movzwl %di, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: testl $32768, %eax # imm = 0x8000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i16 32768, %y @@ -157,36 +155,36 @@ ; X86-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: -; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $4080, %ecx # imm = 0xFF0 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testw %ax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: movzwl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI2-NEXT: shrxl %ecx, %eax, %eax +; X86-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $4080, %eax # imm = 0xFF0 +; X64-BMI1-NEXT: movzwl %di, %eax ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testw %di, %ax +; X64-BMI1-NEXT: shrl %cl, %eax +; X64-BMI1-NEXT: testl $4080, %eax # imm = 0xFF0 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i16_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $4080, %eax # imm = 0xFF0 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testw %di, %ax +; X64-BMI2-NEXT: movzwl %di, %eax +; X64-BMI2-NEXT: shrxl %esi, %eax, %eax +; X64-BMI2-NEXT: testl $4080, %eax # imm = 0xFF0 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i16 4080, %y @@ -201,36 +199,33 @@ ; X86-BMI1-LABEL: scalar_i32_signbit_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_signbit_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $-2147483648, %ecx # imm = 0x80000000 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shrl %cl, %edi +; X64-BMI1-NEXT: testl $-2147483648, %edi # imm = 0x80000000 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $-2147483648, %eax # imm = 0x80000000 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $-2147483648, %eax # imm = 0x80000000 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i32 2147483648, %y @@ -263,36 +258,33 @@ ; X86-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: retl ; ; X86-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al -; X86-BMI2-NEXT: movl $16776960, %ecx # imm = 0xFFFF00 -; X86-BMI2-NEXT: shlxl %eax, %ecx, %eax -; X86-BMI2-NEXT: testl %eax, {{[0-9]+}}(%esp) +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movl %esi, %ecx -; X64-BMI1-NEXT: movl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-BMI1-NEXT: shll %cl, %eax -; X64-BMI1-NEXT: testl %edi, %eax +; X64-BMI1-NEXT: shrl %cl, %edi +; X64-BMI1-NEXT: testl $16776960, %edi # imm = 0xFFFF00 ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i32_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movl $16776960, %eax # imm = 0xFFFF00 -; X64-BMI2-NEXT: shlxl %esi, %eax, %eax -; X64-BMI2-NEXT: testl %edi, %eax +; X64-BMI2-NEXT: shrxl %esi, %edi, %eax +; X64-BMI2-NEXT: testl $16776960, %eax # imm = 0xFFFF00 ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i32 16776960, %y @@ -304,35 +296,43 @@ ; i64 scalar define i1 @scalar_i64_signbit_eq(i64 %x, i64 %y) nounwind { -; X86-LABEL: scalar_i64_signbit_eq: -; X86: # %bb.0: -; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: xorl %eax, %eax -; X86-NEXT: movl $-2147483648, %edx # imm = 0x80000000 -; X86-NEXT: shldl %cl, %eax, %edx -; X86-NEXT: testb $32, %cl -; X86-NEXT: cmovnel %eax, %edx -; X86-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-NEXT: orl $0, %edx -; X86-NEXT: sete %al -; X86-NEXT: retl +; X86-BMI1-LABEL: scalar_i64_signbit_eq: +; X86-BMI1: # %bb.0: +; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: shrl %cl, %eax +; X86-BMI1-NEXT: xorl %edx, %edx +; X86-BMI1-NEXT: testb $32, %cl +; X86-BMI1-NEXT: cmovel %eax, %edx +; X86-BMI1-NEXT: testl $-2147483648, %edx # imm = 0x80000000 +; X86-BMI1-NEXT: sete %al +; X86-BMI1-NEXT: retl +; +; X86-BMI2-LABEL: scalar_i64_signbit_eq: +; X86-BMI2: # %bb.0: +; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-BMI2-NEXT: shrxl %eax, {{[0-9]+}}(%esp), %ecx +; X86-BMI2-NEXT: xorl %edx, %edx +; X86-BMI2-NEXT: testb $32, %al +; X86-BMI2-NEXT: cmovel %ecx, %edx +; X86-BMI2-NEXT: testl $-2147483648, %edx # imm = 0x80000000 +; X86-BMI2-NEXT: sete %al +; X86-BMI2-NEXT: retl ; ; X64-BMI1-LABEL: scalar_i64_signbit_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shlq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax -; X64-BMI1-NEXT: sete %al +; X64-BMI1-NEXT: shrq %cl, %rdi +; X64-BMI1-NEXT: btq $63, %rdi +; X64-BMI1-NEXT: setae %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_signbit_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $-9223372036854775808, %rax # imm = 0x8000000000000000 -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax -; X64-BMI2-NEXT: sete %al +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: btq $63, %rax +; X64-BMI2-NEXT: setae %al ; X64-BMI2-NEXT: retq %t0 = shl i64 9223372036854775808, %y %t1 = and i64 %t0, %x @@ -395,17 +395,18 @@ ; X86-BMI1: # %bb.0: ; X86-BMI1-NEXT: pushl %esi ; X86-BMI1-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI1-NEXT: movl $-65536, %eax # imm = 0xFFFF0000 -; X86-BMI1-NEXT: movl $65535, %edx # imm = 0xFFFF -; X86-BMI1-NEXT: shldl %cl, %eax, %edx -; X86-BMI1-NEXT: shll %cl, %eax -; X86-BMI1-NEXT: xorl %esi, %esi +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI1-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI1-NEXT: movl %edx, %esi +; X86-BMI1-NEXT: shrl %cl, %esi +; X86-BMI1-NEXT: shrdl %cl, %edx, %eax +; X86-BMI1-NEXT: xorl %edx, %edx ; X86-BMI1-NEXT: testb $32, %cl -; X86-BMI1-NEXT: cmovnel %eax, %edx -; X86-BMI1-NEXT: cmovel %eax, %esi -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI1-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI1-NEXT: orl %edx, %esi +; X86-BMI1-NEXT: cmovnel %esi, %eax +; X86-BMI1-NEXT: cmovel %esi, %edx +; X86-BMI1-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-BMI1-NEXT: movzwl %dx, %ecx +; X86-BMI1-NEXT: orl %eax, %ecx ; X86-BMI1-NEXT: sete %al ; X86-BMI1-NEXT: popl %esi ; X86-BMI1-NEXT: retl @@ -414,17 +415,17 @@ ; X86-BMI2: # %bb.0: ; X86-BMI2-NEXT: pushl %esi ; X86-BMI2-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-BMI2-NEXT: movl $-65536, %eax # imm = 0xFFFF0000 -; X86-BMI2-NEXT: movl $65535, %edx # imm = 0xFFFF -; X86-BMI2-NEXT: shldl %cl, %eax, %edx -; X86-BMI2-NEXT: shlxl %ecx, %eax, %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-BMI2-NEXT: shrdl %cl, %edx, %eax +; X86-BMI2-NEXT: shrxl %ecx, %edx, %edx ; X86-BMI2-NEXT: xorl %esi, %esi ; X86-BMI2-NEXT: testb $32, %cl -; X86-BMI2-NEXT: cmovnel %eax, %edx -; X86-BMI2-NEXT: cmovel %eax, %esi -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %edx -; X86-BMI2-NEXT: andl {{[0-9]+}}(%esp), %esi -; X86-BMI2-NEXT: orl %edx, %esi +; X86-BMI2-NEXT: cmovnel %edx, %eax +; X86-BMI2-NEXT: cmovel %edx, %esi +; X86-BMI2-NEXT: andl $-65536, %eax # imm = 0xFFFF0000 +; X86-BMI2-NEXT: movzwl %si, %ecx +; X86-BMI2-NEXT: orl %eax, %ecx ; X86-BMI2-NEXT: sete %al ; X86-BMI2-NEXT: popl %esi ; X86-BMI2-NEXT: retl @@ -432,18 +433,18 @@ ; X64-BMI1-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI1: # %bb.0: ; X64-BMI1-NEXT: movq %rsi, %rcx -; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 ; X64-BMI1-NEXT: # kill: def $cl killed $cl killed $rcx -; X64-BMI1-NEXT: shlq %cl, %rax -; X64-BMI1-NEXT: testq %rdi, %rax +; X64-BMI1-NEXT: shrq %cl, %rdi +; X64-BMI1-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 +; X64-BMI1-NEXT: testq %rax, %rdi ; X64-BMI1-NEXT: sete %al ; X64-BMI1-NEXT: retq ; ; X64-BMI2-LABEL: scalar_i64_bitsinmiddle_eq: ; X64-BMI2: # %bb.0: -; X64-BMI2-NEXT: movabsq $281474976645120, %rax # imm = 0xFFFFFFFF0000 -; X64-BMI2-NEXT: shlxq %rsi, %rax, %rax -; X64-BMI2-NEXT: testq %rdi, %rax +; X64-BMI2-NEXT: shrxq %rsi, %rdi, %rax +; X64-BMI2-NEXT: movabsq $281474976645120, %rcx # imm = 0xFFFFFFFF0000 +; X64-BMI2-NEXT: testq %rcx, %rax ; X64-BMI2-NEXT: sete %al ; X64-BMI2-NEXT: retq %t0 = shl i64 281474976645120, %y @@ -459,45 +460,57 @@ define <4 x i1> @vec_4xi32_splat_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-SSE2-LABEL: vec_4xi32_splat_eq: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pslld $23, %xmm1 -; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 -; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm1, %xmm1 -; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; X86-SSE2-NEXT: pxor %xmm3, %xmm3 +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 +; X86-SSE2-NEXT: psrld %xmm2, %xmm4 +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: psrld %xmm5, %xmm2 +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 +; X86-SSE2-NEXT: psrld %xmm4, %xmm5 +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] +; X86-SSE2-NEXT: psrld %xmm1, %xmm0 +; X86-SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm5[1] +; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3] +; X86-SSE2-NEXT: andps {{\.LCPI.*}}, %xmm2 +; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X86-SSE2-NEXT: retl ; ; AVX2-LABEL: vec_4xi32_splat_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} ; ; X64-SSE2-LABEL: vec_4xi32_splat_eq: ; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pslld $23, %xmm1 -; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 -; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 -; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm1, %xmm1 -; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm3, %xmm3 +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 +; X64-SSE2-NEXT: psrld %xmm2, %xmm4 +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE2-NEXT: psrld %xmm5, %xmm2 +; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm5 +; X64-SSE2-NEXT: psrld %xmm4, %xmm5 +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] +; X64-SSE2-NEXT: psrld %xmm1, %xmm0 +; X64-SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm5[1] +; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3] +; X64-SSE2-NEXT: andps {{.*}}(%rip), %xmm2 +; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X64-SSE2-NEXT: retq %t0 = shl <4 x i32> , %y %t1 = and <4 x i32> %t0, %x @@ -559,45 +572,57 @@ define <4 x i1> @vec_4xi32_nonsplat_undef0_eq(<4 x i32> %x, <4 x i32> %y) nounwind { ; X86-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: ; X86-SSE2: # %bb.0: -; X86-SSE2-NEXT: pslld $23, %xmm1 -; X86-SSE2-NEXT: paddd {{\.LCPI.*}}, %xmm1 -; X86-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 -; X86-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X86-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X86-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X86-SSE2-NEXT: pand %xmm1, %xmm0 -; X86-SSE2-NEXT: pxor %xmm1, %xmm1 -; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; X86-SSE2-NEXT: pxor %xmm3, %xmm3 +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm4 +; X86-SSE2-NEXT: psrld %xmm2, %xmm4 +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X86-SSE2-NEXT: psrld %xmm5, %xmm2 +; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7] +; X86-SSE2-NEXT: movdqa %xmm0, %xmm5 +; X86-SSE2-NEXT: psrld %xmm4, %xmm5 +; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] +; X86-SSE2-NEXT: psrld %xmm1, %xmm0 +; X86-SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm5[1] +; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3] +; X86-SSE2-NEXT: andps {{\.LCPI.*}}, %xmm2 +; X86-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 +; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X86-SSE2-NEXT: retl ; ; AVX2-LABEL: vec_4xi32_nonsplat_undef0_eq: ; AVX2: # %bb.0: ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [1,1,1,1] -; AVX2-NEXT: vpsllvd %xmm1, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm0, %xmm1, %xmm0 -; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX2-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX2-NEXT: vpsrlvd %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpcmpeqd %xmm3, %xmm0, %xmm0 ; AVX2-NEXT: ret{{[l|q]}} ; ; X64-SSE2-LABEL: vec_4xi32_nonsplat_undef0_eq: ; X64-SSE2: # %bb.0: -; X64-SSE2-NEXT: pslld $23, %xmm1 -; X64-SSE2-NEXT: paddd {{.*}}(%rip), %xmm1 -; X64-SSE2-NEXT: cvttps2dq %xmm1, %xmm1 -; X64-SSE2-NEXT: movdqa {{.*#+}} xmm2 = [1,1,1,1] -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,3,3] -; X64-SSE2-NEXT: pmuludq %xmm2, %xmm3 -; X64-SSE2-NEXT: pmuludq %xmm1, %xmm2 -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[0,2,2,3] -; X64-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[0,2,2,3] -; X64-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] -; X64-SSE2-NEXT: pand %xmm1, %xmm0 -; X64-SSE2-NEXT: pxor %xmm1, %xmm1 -; X64-SSE2-NEXT: pcmpeqd %xmm1, %xmm0 +; X64-SSE2-NEXT: pxor %xmm3, %xmm3 +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm2 = xmm1[2,3,3,3,4,5,6,7] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm4 +; X64-SSE2-NEXT: psrld %xmm2, %xmm4 +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm5 = xmm1[0,1,1,1,4,5,6,7] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm2 +; X64-SSE2-NEXT: psrld %xmm5, %xmm2 +; X64-SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm4[0] +; X64-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm4 = xmm1[2,3,3,3,4,5,6,7] +; X64-SSE2-NEXT: movdqa %xmm0, %xmm5 +; X64-SSE2-NEXT: psrld %xmm4, %xmm5 +; X64-SSE2-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,1,1,1,4,5,6,7] +; X64-SSE2-NEXT: psrld %xmm1, %xmm0 +; X64-SSE2-NEXT: punpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm5[1] +; X64-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,3],xmm0[0,3] +; X64-SSE2-NEXT: andps {{.*}}(%rip), %xmm2 +; X64-SSE2-NEXT: pcmpeqd %xmm3, %xmm2 +; X64-SSE2-NEXT: movdqa %xmm2, %xmm0 ; X64-SSE2-NEXT: retq %t0 = shl <4 x i32> , %y %t1 = and <4 x i32> %t0, %x @@ -709,20 +734,19 @@ ; X86-LABEL: scalar_i8_signbit_ne: ; X86: # %bb.0: ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl -; X86-NEXT: movb $-128, %al -; X86-NEXT: shlb %cl, %al -; X86-NEXT: testb %al, {{[0-9]+}}(%esp) -; X86-NEXT: setne %al +; X86-NEXT: movb {{[0-9]+}}(%esp), %al +; X86-NEXT: shrb %cl, %al +; X86-NEXT: shrb $7, %al ; X86-NEXT: retl ; ; X64-LABEL: scalar_i8_signbit_ne: ; X64: # %bb.0: ; X64-NEXT: movl %esi, %ecx -; X64-NEXT: movb $-128, %al +; X64-NEXT: movl %edi, %eax ; X64-NEXT: # kill: def $cl killed $cl killed $ecx -; X64-NEXT: shlb %cl, %al -; X64-NEXT: testb %dil, %al -; X64-NEXT: setne %al +; X64-NEXT: shrb %cl, %al +; X64-NEXT: shrb $7, %al +; X64-NEXT: # kill: def $al killed $al killed $eax ; X64-NEXT: retq %t0 = shl i8 128, %y %t1 = and i8 %t0, %x