Index: include/llvm/CodeGen/SelectionDAG.h =================================================================== --- include/llvm/CodeGen/SelectionDAG.h +++ include/llvm/CodeGen/SelectionDAG.h @@ -570,6 +570,8 @@ bool isOpaque = false) { return getConstant(Val, DL, VT, true, isOpaque); } + + SDValue getBooleanTrueConstant(const SDLoc &DL, EVT VT); /// @} /// \brief Create a ConstantFPSDNode wrapping a constant value. Index: lib/CodeGen/SelectionDAG/SelectionDAG.cpp =================================================================== --- lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -1078,19 +1078,19 @@ } SDValue SelectionDAG::getLogicalNOT(const SDLoc &DL, SDValue Val, EVT VT) { - EVT EltVT = VT.getScalarType(); - SDValue TrueValue; + SDValue TrueValue = getBooleanTrueConstant(DL, VT); + return getNode(ISD::XOR, DL, VT, Val, TrueValue); +} + +SDValue SelectionDAG::getBooleanTrueConstant(const SDLoc &DL, EVT VT) { switch (TLI->getBooleanContents(VT)) { - case TargetLowering::ZeroOrOneBooleanContent: - case TargetLowering::UndefinedBooleanContent: - TrueValue = getConstant(1, DL, VT); - break; - case TargetLowering::ZeroOrNegativeOneBooleanContent: - TrueValue = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), DL, - VT); - break; + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + return getConstant(1, DL, VT); + case TargetLowering::ZeroOrNegativeOneBooleanContent: + return getAllOnesConstant(DL, VT); } - return getNode(ISD::XOR, DL, VT, Val, TrueValue); + llvm_unreachable("Unexpected boolean content enum!"); } SDValue SelectionDAG::getConstant(uint64_t Val, const SDLoc &DL, EVT VT, Index: lib/CodeGen/SelectionDAG/TargetLowering.cpp =================================================================== --- lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1486,18 +1486,13 @@ case ISD::SETFALSE: case ISD::SETFALSE2: return DAG.getConstant(0, dl, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: { - TargetLowering::BooleanContent Cnt = - getBooleanContents(N0->getValueType(0)); - return DAG.getConstant( - Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, dl, - VT); - } + case ISD::SETTRUE2: return DAG.getBooleanTrueConstant(dl, VT); } // Ensure that the constant occurs on the RHS and fold constant comparisons. + // TODO: Handle non-splat vector constants. All undef causes trouble. ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); - if (isa(N0.getNode()) && + if (isConstOrConstSplat(N0) && (DCI.isBeforeLegalizeOps() || isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); @@ -1872,9 +1867,15 @@ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } + } + + // These simplifications apply to splat vectors as well. + // TODO: Handle more splat vector cases. + if (auto *N1C = isConstOrConstSplat(N1)) { + const APInt &C1 = N1C->getAPIntValue(); APInt MinVal, MaxVal; - unsigned OperandBitSize = N1C->getValueType(0).getSizeInBits(); + unsigned OperandBitSize = N1C->getValueType(0).getScalarSizeInBits(); if (ISD::isSignedIntSetCC(Cond)) { MinVal = APInt::getSignedMinValue(OperandBitSize); MaxVal = APInt::getSignedMaxValue(OperandBitSize); @@ -1887,47 +1888,51 @@ if (Cond == ISD::SETGE || Cond == ISD::SETUGE) { // X >= MIN --> true if (C1 == MinVal) - return DAG.getConstant(1, dl, VT); - - // X >= C0 --> X > (C0 - 1) - APInt C = C1 - 1; - ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + return DAG.getBooleanTrueConstant(dl, VT); + + if (!VT.isVector()) { // TODO: Support this for vectors. + // X >= C0 --> X > (C0 - 1) + APInt C = C1 - 1; + ISD::CondCode NewCC = (Cond == ISD::SETGE) ? ISD::SETGT : ISD::SETUGT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } if (Cond == ISD::SETLE || Cond == ISD::SETULE) { // X <= MAX --> true if (C1 == MaxVal) - return DAG.getConstant(1, dl, VT); + return DAG.getBooleanTrueConstant(dl, VT); // X <= C0 --> X < (C0 + 1) - APInt C = C1 + 1; - ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; - if ((DCI.isBeforeLegalizeOps() || - isCondCodeLegal(NewCC, VT.getSimpleVT())) && - (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && - isLegalICmpImmediate(C.getSExtValue())))) { - return DAG.getSetCC(dl, VT, N0, - DAG.getConstant(C, dl, N1.getValueType()), - NewCC); + if (!VT.isVector()) { // TODO: Support this for vectors. + APInt C = C1 + 1; + ISD::CondCode NewCC = (Cond == ISD::SETLE) ? ISD::SETLT : ISD::SETULT; + if ((DCI.isBeforeLegalizeOps() || + isCondCodeLegal(NewCC, VT.getSimpleVT())) && + (!N1C->isOpaque() || (N1C->isOpaque() && C.getBitWidth() <= 64 && + isLegalICmpImmediate(C.getSExtValue())))) { + return DAG.getSetCC(dl, VT, N0, + DAG.getConstant(C, dl, N1.getValueType()), + NewCC); + } } } if ((Cond == ISD::SETLT || Cond == ISD::SETULT) && C1 == MinVal) - return DAG.getConstant(0, dl, VT); // X < MIN --> false + return DAG.getConstant(0, dl, VT); // X < MIN --> false if ((Cond == ISD::SETGE || Cond == ISD::SETUGE) && C1 == MinVal) - return DAG.getConstant(1, dl, VT); // X >= MIN --> true + return DAG.getBooleanTrueConstant(dl, VT); // X >= MIN --> true if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal) - return DAG.getConstant(0, dl, VT); // X > MAX --> false + return DAG.getConstant(0, dl, VT); // X > MAX --> false if ((Cond == ISD::SETLE || Cond == ISD::SETULE) && C1 == MaxVal) - return DAG.getConstant(1, dl, VT); // X <= MAX --> true + return DAG.getBooleanTrueConstant(dl, VT); // X <= MAX --> true // Canonicalize setgt X, Min --> setne X, Min if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MinVal) @@ -1965,6 +1970,12 @@ N1.getValueType()); return DAG.getSetCC(dl, VT, N0, ConstMinusOne, ISD::SETGT); } + } + + // Back to non-vector simplifications. + // TODO: Can we do these for vector splats? + if (auto *N1C = dyn_cast(N1.getNode())) { + const APInt &C1 = N1C->getAPIntValue(); // Fold bit comparisons when we can. if ((Cond == ISD::SETEQ || Cond == ISD::SETNE) && Index: test/CodeGen/AArch64/arm64-neon-compare-instructions.ll =================================================================== --- test/CodeGen/AArch64/arm64-neon-compare-instructions.ll +++ test/CodeGen/AArch64/arm64-neon-compare-instructions.ll @@ -858,114 +858,116 @@ } define <8 x i8> @cmhsz8xi8(<8 x i8> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].8b, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b - %tmp3 = icmp uge <8 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhsz16xi8(<16 x i8> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].16b, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b - %tmp3 = icmp uge <16 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhsz4xi16(<4 x i16> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].4h, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h - %tmp3 = icmp uge <4 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhsz8xi16(<8 x i16> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].8h, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h - %tmp3 = icmp uge <8 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhsz2xi32(<2 x i32> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].2s, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s - %tmp3 = icmp uge <2 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhsz4xi32(<4 x i32> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].4s, #2 ;CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s - %tmp3 = icmp uge <4 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: orr w[[TWO:[0-9]+]], wzr, #0x2 +;CHECK-NEXT: dup v[[ZERO:[0-9]+]].2d, x[[TWO]] ;CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d - %tmp3 = icmp uge <2 x i64> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } define <8 x i8> @cmhiz8xi8(<8 x i8> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].8b, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, v[[ZERO]].8b - %tmp3 = icmp ugt <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhiz16xi8(<16 x i8> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].16b, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, v[[ZERO]].16b - %tmp3 = icmp ugt <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhiz4xi16(<4 x i16> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].4h, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, v[[ZERO]].4h - %tmp3 = icmp ugt <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhiz8xi16(<8 x i16> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].8h, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, v[[ZERO]].8h - %tmp3 = icmp ugt <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhiz2xi32(<2 x i32> %A) { -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].2s, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, v[[ZERO]].2s - %tmp3 = icmp ugt <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhiz4xi32(<4 x i32> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].4s, #1 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, v[[ZERO]].4s - %tmp3 = icmp ugt <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhiz2xi64(<2 x i64> %A) { -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: orr w[[ONE:[0-9]+]], wzr, #0x1 +;CHECK-NEXT: dup v[[ZERO:[0-9]+]].2d, x[[ONE]] ;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, v[[ZERO]].2d - %tmp3 = icmp ugt <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -1043,9 +1045,9 @@ define <8 x i8> @cmloz8xi8(<8 x i8> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].8b, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v[[ZERO]].8b, {{v[0-9]+}}.8b - %tmp3 = icmp ult <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } @@ -1053,9 +1055,9 @@ define <16 x i8> @cmloz16xi8(<16 x i8> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].16b, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v[[ZERO]].16b, v0.16b - %tmp3 = icmp ult <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } @@ -1063,9 +1065,9 @@ define <4 x i16> @cmloz4xi16(<4 x i16> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].4h, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v[[ZERO]].4h, v0.4h - %tmp3 = icmp ult <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } @@ -1073,9 +1075,9 @@ define <8 x i16> @cmloz8xi16(<8 x i16> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].8h, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v[[ZERO]].8h, v0.8h - %tmp3 = icmp ult <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -1083,9 +1085,9 @@ define <2 x i32> @cmloz2xi32(<2 x i32> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi d[[ZERO:[0-9]+]], #0 +;CHECK: movi v[[ZERO:[0-9]+]].2s, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v[[ZERO]].2s, v0.2s - %tmp3 = icmp ult <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } @@ -1093,9 +1095,9 @@ define <4 x i32> @cmloz4xi32(<4 x i32> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: movi v[[ZERO:[0-9]+]].4s, #2 ;CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v[[ZERO]].4s, v0.4s - %tmp3 = icmp ult <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -1103,9 +1105,10 @@ define <2 x i64> @cmloz2xi64(<2 x i64> %A) { ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -;CHECK: movi v[[ZERO:[0-9]+]].2d, #0 +;CHECK: orr w[[TWO:[0-9]+]], wzr, #0x2 +;CHECK-NEXT: dup v[[ZERO:[0-9]+]].2d, x[[TWO]] ;CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v[[ZERO]].2d, v0.2d - %tmp3 = icmp ult <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } Index: test/CodeGen/AArch64/neon-compare-instructions.ll =================================================================== --- test/CodeGen/AArch64/neon-compare-instructions.ll +++ test/CodeGen/AArch64/neon-compare-instructions.ll @@ -1092,63 +1092,64 @@ define <8 x i8> @cmhsz8xi8(<8 x i8> %A) { ; CHECK-LABEL: cmhsz8xi8: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8b, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = icmp uge <8 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhsz16xi8(<16 x i8> %A) { ; CHECK-LABEL: cmhsz16xi8: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.16b, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = icmp uge <16 x i8> %A, zeroinitializer; + %tmp3 = icmp uge <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhsz4xi16(<4 x i16> %A) { ; CHECK-LABEL: cmhsz4xi16: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4h, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = icmp uge <4 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhsz8xi16(<8 x i16> %A) { ; CHECK-LABEL: cmhsz8xi16: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8h, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = icmp uge <8 x i16> %A, zeroinitializer; + %tmp3 = icmp uge <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhsz2xi32(<2 x i32> %A) { ; CHECK-LABEL: cmhsz2xi32: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.2s, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = icmp uge <2 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhsz4xi32(<4 x i32> %A) { ; CHECK-LABEL: cmhsz4xi32: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4s, #{{0x2|2}} ; CHECK-NEXT: cmhs {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = icmp uge <4 x i32> %A, zeroinitializer; + %tmp3 = icmp uge <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhsz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmhsz2xi64: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: orr w[[TWO:[0-9]+]], wzr, #0x2 +; CHECK-NEXT: {{v[0-9]+}}.2d, x[[TWO]] ; CHECK-NEXT: cmhs {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = icmp uge <2 x i64> %A, zeroinitializer; + %tmp3 = icmp uge <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -1156,63 +1157,64 @@ define <8 x i8> @cmhiz8xi8(<8 x i8> %A) { ; CHECK-LABEL: cmhiz8xi8: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8b, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8b, {{v[0-9]+}}.8b, {{v[0-9]+}}.8b - %tmp3 = icmp ugt <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } define <16 x i8> @cmhiz16xi8(<16 x i8> %A) { ; CHECK-LABEL: cmhiz16xi8: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.16b, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.16b, {{v[0-9]+}}.16b, {{v[0-9]+}}.16b - %tmp3 = icmp ugt <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ugt <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } define <4 x i16> @cmhiz4xi16(<4 x i16> %A) { ; CHECK-LABEL: cmhiz4xi16: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4h, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h - %tmp3 = icmp ugt <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } define <8 x i16> @cmhiz8xi16(<8 x i16> %A) { ; CHECK-LABEL: cmhiz8xi16: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.8h, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h - %tmp3 = icmp ugt <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ugt <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } define <2 x i32> @cmhiz2xi32(<2 x i32> %A) { ; CHECK-LABEL: cmhiz2xi32: -; CHECK: movi {{v[0-9]+.8b|d[0-9]+}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.2s, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s - %tmp3 = icmp ugt <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } define <4 x i32> @cmhiz4xi32(<4 x i32> %A) { ; CHECK-LABEL: cmhiz4xi32: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: movi {{v[0-9]+}}.4s, #{{0x1|1}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s - %tmp3 = icmp ugt <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ugt <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } define <2 x i64> @cmhiz2xi64(<2 x i64> %A) { ; CHECK-LABEL: cmhiz2xi64: -; CHECK: movi {{v[0-9]+.(16b|2d)}}, #{{0x0|0}} +; CHECK: orr w[[ONE:[0-9]+]], wzr, #{{0x1|1}} +; CHECK-NEXT: dup {{v[0-9]+}}.2d, x[[ONE]] ; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d - %tmp3 = icmp ugt <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ugt <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } @@ -1298,9 +1300,9 @@ ; CHECK-LABEL: cmloz8xi8: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}} +; CHECK: movi v1.8b, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8b, v1.8b, {{v[0-9]+}}.8b - %tmp3 = icmp ult <8 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i8> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i8> ret <8 x i8> %tmp4 } @@ -1309,9 +1311,9 @@ ; CHECK-LABEL: cmloz16xi8: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: movi v1.16b, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.16b, v1.16b, v0.16b - %tmp3 = icmp ult <16 x i8> %A, zeroinitializer; + %tmp3 = icmp ult <16 x i8> %A, %tmp4 = sext <16 x i1> %tmp3 to <16 x i8> ret <16 x i8> %tmp4 } @@ -1320,9 +1322,9 @@ ; CHECK-LABEL: cmloz4xi16: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}} +; CHECK: movi v1.4h, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4h, v1.4h, v0.4h - %tmp3 = icmp ult <4 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i16> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i16> ret <4 x i16> %tmp4 } @@ -1331,9 +1333,9 @@ ; CHECK-LABEL: cmloz8xi16: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: movi v1.8h, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.8h, v1.8h, v0.8h - %tmp3 = icmp ult <8 x i16> %A, zeroinitializer; + %tmp3 = icmp ult <8 x i16> %A, %tmp4 = sext <8 x i1> %tmp3 to <8 x i16> ret <8 x i16> %tmp4 } @@ -1342,9 +1344,9 @@ ; CHECK-LABEL: cmloz2xi32: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.8b|d1}}, #{{0x0|0}} +; CHECK: movi v1.2s, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.2s, v1.2s, v0.2s - %tmp3 = icmp ult <2 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i32> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i32> ret <2 x i32> %tmp4 } @@ -1353,9 +1355,9 @@ ; CHECK-LABEL: cmloz4xi32: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: movi v1.4s, #{{0x2|2}} ; CHECK-NEXT: cmhi {{v[0-9]+}}.4s, v1.4s, v0.4s - %tmp3 = icmp ult <4 x i32> %A, zeroinitializer; + %tmp3 = icmp ult <4 x i32> %A, %tmp4 = sext <4 x i1> %tmp3 to <4 x i32> ret <4 x i32> %tmp4 } @@ -1364,9 +1366,10 @@ ; CHECK-LABEL: cmloz2xi64: ; Using registers other than v0, v1 are possible, but would be odd. ; LO implemented as HI, so check reversed operands. -; CHECK: movi {{v1.16b|v1.2d}}, #{{0x0|0}} +; CHECK: orr w[[TWO:[0-9]+]], wzr, #{{0x2|2}} +; CHECK-NEXT: dup v1.2d, x[[TWO]] ; CHECK-NEXT: cmhi {{v[0-9]+}}.2d, v1.2d, v0.2d - %tmp3 = icmp ult <2 x i64> %A, zeroinitializer; + %tmp3 = icmp ult <2 x i64> %A, %tmp4 = sext <2 x i1> %tmp3 to <2 x i64> ret <2 x i64> %tmp4 } Index: test/CodeGen/Mips/msa/compare.ll =================================================================== --- test/CodeGen/Mips/msa/compare.ll +++ test/CodeGen/Mips/msa/compare.ll @@ -671,9 +671,9 @@ %1 = load <16 x i8>, <16 x i8>* %a ; CHECK-DAG: ld.b [[R1:\$w[0-9]+]], 0($5) - %2 = icmp ult <16 x i8> %1, + %2 = icmp ult <16 x i8> %1, %3 = sext <16 x i1> %2 to <16 x i8> - ; CHECK-DAG: clti_u.b [[R3:\$w[0-9]+]], [[R1]], 1 + ; CHECK-DAG: clti_u.b [[R3:\$w[0-9]+]], [[R1]], 2 store <16 x i8> %3, <16 x i8>* %c ; CHECK-DAG: st.b [[R3]], 0($4) @@ -686,9 +686,9 @@ %1 = load <8 x i16>, <8 x i16>* %a ; CHECK-DAG: ld.h [[R1:\$w[0-9]+]], 0($5) - %2 = icmp ult <8 x i16> %1, + %2 = icmp ult <8 x i16> %1, %3 = sext <8 x i1> %2 to <8 x i16> - ; CHECK-DAG: clti_u.h [[R3:\$w[0-9]+]], [[R1]], 1 + ; CHECK-DAG: clti_u.h [[R3:\$w[0-9]+]], [[R1]], 2 store <8 x i16> %3, <8 x i16>* %c ; CHECK-DAG: st.h [[R3]], 0($4) @@ -701,9 +701,9 @@ %1 = load <4 x i32>, <4 x i32>* %a ; CHECK-DAG: ld.w [[R1:\$w[0-9]+]], 0($5) - %2 = icmp ult <4 x i32> %1, + %2 = icmp ult <4 x i32> %1, %3 = sext <4 x i1> %2 to <4 x i32> - ; CHECK-DAG: clti_u.w [[R3:\$w[0-9]+]], [[R1]], 1 + ; CHECK-DAG: clti_u.w [[R3:\$w[0-9]+]], [[R1]], 2 store <4 x i32> %3, <4 x i32>* %c ; CHECK-DAG: st.w [[R3]], 0($4) @@ -716,9 +716,9 @@ %1 = load <2 x i64>, <2 x i64>* %a ; CHECK-DAG: ld.d [[R1:\$w[0-9]+]], 0($5) - %2 = icmp ult <2 x i64> %1, + %2 = icmp ult <2 x i64> %1, %3 = sext <2 x i1> %2 to <2 x i64> - ; CHECK-DAG: clti_u.d [[R3:\$w[0-9]+]], [[R1]], 1 + ; CHECK-DAG: clti_u.d [[R3:\$w[0-9]+]], [[R1]], 2 store <2 x i64> %3, <2 x i64>* %c ; CHECK-DAG: st.d [[R3]], 0($4) Index: test/CodeGen/X86/avx512-insert-extract.ll =================================================================== --- test/CodeGen/X86/avx512-insert-extract.ll +++ test/CodeGen/X86/avx512-insert-extract.ll @@ -1707,9 +1707,9 @@ ; KNL-NEXT: andq $-32, %rsp ; KNL-NEXT: subq $64, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi -; KNL-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; KNL-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 +; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: andl $31, %esi ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm0, (%rsp) @@ -1739,8 +1739,7 @@ ; SKX-NEXT: andq $-32, %rsp ; SKX-NEXT: subq $64, %rsp ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpcmpnleub %ymm1, %ymm0, %k0 +; SKX-NEXT: vptestmb %ymm0, %ymm0, %k0 ; SKX-NEXT: andl $31, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k0, %ymm0 @@ -1771,11 +1770,11 @@ ; KNL-NEXT: andq $-64, %rsp ; KNL-NEXT: subq $128, %rsp ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi -; KNL-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; KNL-NEXT: vpxor %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpcmpgtb %ymm2, %ymm0, %ymm0 -; KNL-NEXT: vpxor %ymm2, %ymm1, %ymm1 -; KNL-NEXT: vpcmpgtb %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; KNL-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm0 +; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; KNL-NEXT: vpcmpeqb %ymm2, %ymm1, %ymm1 +; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 ; KNL-NEXT: andl $63, %esi ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm1, {{[0-9]+}}(%rsp) @@ -1822,8 +1821,7 @@ ; SKX-NEXT: andq $-64, %rsp ; SKX-NEXT: subq $128, %rsp ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi -; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; SKX-NEXT: vpcmpnleub %zmm1, %zmm0, %k0 +; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 ; SKX-NEXT: andl $63, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k0, %zmm0 @@ -1952,14 +1950,13 @@ ; KNL-NEXT: vpinsrb $14, 208(%rbp), %xmm3, %xmm3 ; KNL-NEXT: vpinsrb $15, 216(%rbp), %xmm3, %xmm3 ; KNL-NEXT: vinserti128 $1, %xmm3, %ymm2, %ymm2 -; KNL-NEXT: vmovdqa {{.*#+}} ymm3 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; KNL-NEXT: vpxor %ymm3, %ymm2, %ymm2 -; KNL-NEXT: vpcmpgtb %ymm3, %ymm2, %ymm2 -; KNL-NEXT: vpxor %ymm3, %ymm1, %ymm1 -; KNL-NEXT: vpcmpgtb %ymm3, %ymm1, %ymm1 -; KNL-NEXT: vpxor %ymm3, %ymm0, %ymm0 -; KNL-NEXT: vpcmpgtb %ymm3, %ymm0, %ymm0 ; KNL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 +; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm1, %ymm1 +; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 +; KNL-NEXT: vpcmpeqb %ymm3, %ymm0, %ymm0 +; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 ; KNL-NEXT: movl 744(%rbp), %eax ; KNL-NEXT: andl $127, %eax ; KNL-NEXT: cmpb $0, 736(%rbp) @@ -2134,9 +2131,8 @@ ; SKX-NEXT: vpinsrb $14, 720(%rbp), %xmm2, %xmm2 ; SKX-NEXT: vpinsrb $15, 728(%rbp), %xmm2, %xmm2 ; SKX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 ; SKX-NEXT: movl 744(%rbp), %eax ; SKX-NEXT: andl $127, %eax ; SKX-NEXT: cmpb $0, 736(%rbp) @@ -2173,15 +2169,15 @@ ; KNL-NEXT: andq $-128, %rsp ; KNL-NEXT: subq $256, %rsp ## imm = 0x100 ; KNL-NEXT: ## kill: def $esi killed $esi def $rsi -; KNL-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] -; KNL-NEXT: vpxor %ymm4, %ymm0, %ymm0 -; KNL-NEXT: vpcmpgtb %ymm4, %ymm0, %ymm0 -; KNL-NEXT: vpxor %ymm4, %ymm1, %ymm1 -; KNL-NEXT: vpcmpgtb %ymm4, %ymm1, %ymm1 -; KNL-NEXT: vpxor %ymm4, %ymm2, %ymm2 -; KNL-NEXT: vpcmpgtb %ymm4, %ymm2, %ymm2 -; KNL-NEXT: vpxor %ymm4, %ymm3, %ymm3 -; KNL-NEXT: vpcmpgtb %ymm4, %ymm3, %ymm3 +; KNL-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm0, %ymm0 +; KNL-NEXT: vpternlogq $15, %zmm0, %zmm0, %zmm0 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm1, %ymm1 +; KNL-NEXT: vpternlogq $15, %zmm1, %zmm1, %zmm1 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 +; KNL-NEXT: vpternlogq $15, %zmm2, %zmm2, %zmm2 +; KNL-NEXT: vpcmpeqb %ymm4, %ymm3, %ymm3 +; KNL-NEXT: vpternlogq $15, %zmm3, %zmm3, %zmm3 ; KNL-NEXT: andl $127, %esi ; KNL-NEXT: testb %dil, %dil ; KNL-NEXT: vmovdqa %ymm3, {{[0-9]+}}(%rsp) @@ -2256,9 +2252,8 @@ ; SKX-NEXT: andq $-128, %rsp ; SKX-NEXT: subq $256, %rsp ## imm = 0x100 ; SKX-NEXT: ## kill: def $esi killed $esi def $rsi -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm0, %k0 -; SKX-NEXT: vpcmpnleub %zmm2, %zmm1, %k1 +; SKX-NEXT: vptestmb %zmm0, %zmm0, %k0 +; SKX-NEXT: vptestmb %zmm1, %zmm1, %k1 ; SKX-NEXT: andl $127, %esi ; SKX-NEXT: testb %dil, %dil ; SKX-NEXT: vpmovm2b %k1, %zmm0 Index: test/CodeGen/X86/avx512-mask-op.ll =================================================================== --- test/CodeGen/X86/avx512-mask-op.ll +++ test/CodeGen/X86/avx512-mask-op.ll @@ -626,13 +626,13 @@ ; KNL-LABEL: test8: ; KNL: ## %bb.0: ; KNL-NEXT: cmpl %esi, %edi -; KNL-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; KNL-NEXT: jg LBB17_1 ; KNL-NEXT: ## %bb.2: -; KNL-NEXT: vpcmpltud %zmm2, %zmm1, %k1 +; KNL-NEXT: kxorw %k0, %k0, %k1 ; KNL-NEXT: jmp LBB17_3 ; KNL-NEXT: LBB17_1: -; KNL-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 +; KNL-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1 ; KNL-NEXT: LBB17_3: ; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; KNL-NEXT: vpmovdb %zmm0, %xmm0 @@ -642,15 +642,15 @@ ; SKX-LABEL: test8: ; SKX: ## %bb.0: ; SKX-NEXT: cmpl %esi, %edi -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; SKX-NEXT: jg LBB17_1 ; SKX-NEXT: ## %bb.2: -; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; SKX-NEXT: kxorw %k0, %k0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq ; SKX-NEXT: LBB17_1: -; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; SKX-NEXT: vpmovm2b %k0, %xmm0 ; SKX-NEXT: vzeroupper ; SKX-NEXT: retq @@ -658,16 +658,16 @@ ; AVX512BW-LABEL: test8: ; AVX512BW: ## %bb.0: ; AVX512BW-NEXT: cmpl %esi, %edi -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512BW-NEXT: jg LBB17_1 ; AVX512BW-NEXT: ## %bb.2: -; AVX512BW-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; AVX512BW-NEXT: kxorw %k0, %k0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; AVX512BW-NEXT: LBB17_1: -; AVX512BW-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512BW-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 ; AVX512BW-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512BW-NEXT: vzeroupper @@ -676,13 +676,13 @@ ; AVX512DQ-LABEL: test8: ; AVX512DQ: ## %bb.0: ; AVX512DQ-NEXT: cmpl %esi, %edi -; AVX512DQ-NEXT: vpxor %xmm2, %xmm2, %xmm2 ; AVX512DQ-NEXT: jg LBB17_1 ; AVX512DQ-NEXT: ## %bb.2: -; AVX512DQ-NEXT: vpcmpltud %zmm2, %zmm1, %k0 +; AVX512DQ-NEXT: kxorw %k0, %k0, %k0 ; AVX512DQ-NEXT: jmp LBB17_3 ; AVX512DQ-NEXT: LBB17_1: -; AVX512DQ-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 +; AVX512DQ-NEXT: vpxor %xmm1, %xmm1, %xmm1 +; AVX512DQ-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; AVX512DQ-NEXT: LBB17_3: ; AVX512DQ-NEXT: vpmovm2d %k0, %zmm0 ; AVX512DQ-NEXT: vpmovdb %zmm0, %xmm0 Index: test/CodeGen/X86/avx512-schedule.ll =================================================================== --- test/CodeGen/X86/avx512-schedule.ll +++ test/CodeGen/X86/avx512-schedule.ll @@ -7058,32 +7058,32 @@ define <16 x i8> @vcmp_test8(<16 x i32>%a, <16 x i32>%b, i32 %a1, i32 %b1) { ; GENERIC-LABEL: vcmp_test8: ; GENERIC: # %bb.0: -; GENERIC-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; GENERIC-NEXT: cmpl %esi, %edi # sched: [1:0.33] ; GENERIC-NEXT: jg .LBB386_1 # sched: [1:1.00] ; GENERIC-NEXT: # %bb.2: -; GENERIC-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] +; GENERIC-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; GENERIC-NEXT: .LBB386_1: -; GENERIC-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] +; GENERIC-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; GENERIC-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; GENERIC-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.33] ; GENERIC-NEXT: vzeroupper # sched: [100:0.33] ; GENERIC-NEXT: retq # sched: [1:1.00] ; ; SKX-LABEL: vcmp_test8: ; SKX: # %bb.0: -; SKX-NEXT: vpxor %xmm2, %xmm2, %xmm2 # sched: [1:0.33] ; SKX-NEXT: cmpl %esi, %edi # sched: [1:0.25] ; SKX-NEXT: jg .LBB386_1 # sched: [1:0.50] ; SKX-NEXT: # %bb.2: -; SKX-NEXT: vpcmpltud %zmm2, %zmm1, %k0 # sched: [3:1.00] +; SKX-NEXT: kxorw %k0, %k0, %k0 # sched: [1:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00] ; SKX-NEXT: .LBB386_1: -; SKX-NEXT: vpcmpgtd %zmm2, %zmm0, %k0 # sched: [3:1.00] +; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 # sched: [1:0.33] +; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 # sched: [3:1.00] ; SKX-NEXT: vpmovm2b %k0, %xmm0 # sched: [1:0.25] ; SKX-NEXT: vzeroupper # sched: [4:1.00] ; SKX-NEXT: retq # sched: [7:1.00]