Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -129,20 +129,8 @@ PREDICATE_CAST, // Predicate cast for MVE i1 types - VCEQ, // Vector compare equal. - VCEQZ, // Vector compare equal to zero. - VCNE, // Vector compare not equal (MVE) - VCNEZ, // Vector compare not equal to zero (MVE) - VCGE, // Vector compare greater than or equal. - VCGEZ, // Vector compare greater than or equal to zero. - VCLE, // Vector compare less than or equal. - VCLEZ, // Vector compare less than or equal to zero. - VCGEU, // Vector compare unsigned greater than or equal. - VCGT, // Vector compare greater than. - VCGTZ, // Vector compare greater than zero. - VCLT, // Vector compare less than. - VCLTZ, // Vector compare less than zero. - VCGTU, // Vector compare unsigned greater than. + VCMP, // Vector compare. + VCMPZ, // Vector compare to zero. VTST, // Vector test bits. // Vector shift by vector Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1516,20 +1516,8 @@ case ARMISD::WIN__CHKSTK: return "ARMISD::WIN__CHKSTK"; case ARMISD::WIN__DBZCHK: return "ARMISD::WIN__DBZCHK"; - case ARMISD::VCEQ: return "ARMISD::VCEQ"; - case ARMISD::VCEQZ: return "ARMISD::VCEQZ"; - case ARMISD::VCNE: return "ARMISD::VCNE"; - case ARMISD::VCNEZ: return "ARMISD::VCNEZ"; - case ARMISD::VCGE: return "ARMISD::VCGE"; - case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; - case ARMISD::VCLE: return "ARMISD::VCLE"; - case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; - case ARMISD::VCGEU: return "ARMISD::VCGEU"; - case ARMISD::VCGT: return "ARMISD::VCGT"; - case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; - case ARMISD::VCLT: return "ARMISD::VCLT"; - case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; - case ARMISD::VCGTU: return "ARMISD::VCGTU"; + case ARMISD::VCMP: return "ARMISD::VCMP"; + case ARMISD::VCMPZ: return "ARMISD::VCMPZ"; case ARMISD::VTST: return "ARMISD::VTST"; case ARMISD::VSHLs: return "ARMISD::VSHLs"; @@ -5871,10 +5859,9 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) { - SDValue TmpOp0, TmpOp1; bool Invert = false; bool Swap = false; - unsigned Opc = 0; + unsigned Opc = ARMCC::AL; SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -5930,44 +5917,48 @@ case ISD::SETUNE: case ISD::SETNE: if (ST->hasMVEFloatOps()) { - Opc = ARMISD::VCNE; break; + Opc = ARMCC::NE; break; } else { Invert = true; LLVM_FALLTHROUGH; } case ISD::SETOEQ: - case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETEQ: Opc = ARMCC::EQ; break; case ISD::SETOLT: case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGT: - case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETGT: Opc = ARMCC::GT; break; case ISD::SETOLE: case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; case ISD::SETOGE: - case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETGE: Opc = ARMCC::GE; break; case ISD::SETUGE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULE: Invert = true; Opc = ARMISD::VCGT; break; + case ISD::SETULE: Invert = true; Opc = ARMCC::GT; break; case ISD::SETUGT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETULT: Invert = true; Opc = ARMISD::VCGE; break; + case ISD::SETULT: Invert = true; Opc = ARMCC::GE; break; case ISD::SETUEQ: Invert = true; LLVM_FALLTHROUGH; - case ISD::SETONE: + case ISD::SETONE: { // Expand this to (OLT | OGT). - TmpOp0 = Op0; - TmpOp1 = Op1; - Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp0, TmpOp1); - break; - case ISD::SETUO: - Invert = true; - LLVM_FALLTHROUGH; - case ISD::SETO: + SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1); + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; + } + case ISD::SETUO: Invert = true; LLVM_FALLTHROUGH; + case ISD::SETO: { // Expand this to (OLT | OGE). - TmpOp0 = Op0; - TmpOp1 = Op1; - Opc = ISD::OR; - Op0 = DAG.getNode(ARMISD::VCGT, dl, CmpVT, TmpOp1, TmpOp0); - Op1 = DAG.getNode(ARMISD::VCGE, dl, CmpVT, TmpOp0, TmpOp1); - break; + SDValue TmpOp0 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op1, Op0, + DAG.getConstant(ARMCC::GT, dl, MVT::i32)); + SDValue TmpOp1 = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(ARMCC::GE, dl, MVT::i32)); + SDValue Result = DAG.getNode(ISD::OR, dl, CmpVT, TmpOp0, TmpOp1); + if (Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; + } } } else { // Integer comparisons. @@ -5975,23 +5966,23 @@ default: llvm_unreachable("Illegal integer comparison"); case ISD::SETNE: if (ST->hasMVEIntegerOps()) { - Opc = ARMISD::VCNE; break; + Opc = ARMCC::NE; break; } else { Invert = true; LLVM_FALLTHROUGH; } - case ISD::SETEQ: Opc = ARMISD::VCEQ; break; + case ISD::SETEQ: Opc = ARMCC::EQ; break; case ISD::SETLT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGT: Opc = ARMISD::VCGT; break; + case ISD::SETGT: Opc = ARMCC::GT; break; case ISD::SETLE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETGE: Opc = ARMISD::VCGE; break; + case ISD::SETGE: Opc = ARMCC::GE; break; case ISD::SETULT: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGT: Opc = ARMISD::VCGTU; break; + case ISD::SETUGT: Opc = ARMCC::HI; break; case ISD::SETULE: Swap = true; LLVM_FALLTHROUGH; - case ISD::SETUGE: Opc = ARMISD::VCGEU; break; + case ISD::SETUGE: Opc = ARMCC::HS; break; } // Detect VTST (Vector Test Bits) = icmp ne (and (op0, op1), zero). - if (ST->hasNEON() && Opc == ARMISD::VCEQ) { + if (ST->hasNEON() && Opc == ARMCC::EQ) { SDValue AndOp; if (ISD::isBuildVectorAllZeros(Op1.getNode())) AndOp = Op0; @@ -6003,10 +5994,12 @@ AndOp = AndOp.getOperand(0); if (AndOp.getNode() && AndOp.getOpcode() == ISD::AND) { - Opc = ARMISD::VTST; Op0 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(0)); Op1 = DAG.getNode(ISD::BITCAST, dl, CmpVT, AndOp.getOperand(1)); - Invert = !Invert; + SDValue Result = DAG.getNode(ARMISD::VTST, dl, CmpVT, Op0, Op1); + if (!Invert) + Result = DAG.getNOT(dl, Result, VT); + return Result; } } } @@ -6020,34 +6013,20 @@ if (ISD::isBuildVectorAllZeros(Op1.getNode())) SingleOp = Op0; else if (ISD::isBuildVectorAllZeros(Op0.getNode())) { - if (Opc == ARMISD::VCGE) - Opc = ARMISD::VCLEZ; - else if (Opc == ARMISD::VCGT) - Opc = ARMISD::VCLTZ; + if (Opc == ARMCC::GE) + Opc = ARMCC::LE; + else if (Opc == ARMCC::GT) + Opc = ARMCC::LT; SingleOp = Op1; } SDValue Result; if (SingleOp.getNode()) { - switch (Opc) { - case ARMISD::VCNE: - assert(ST->hasMVEIntegerOps() && "Unexpected DAG node"); - Result = DAG.getNode(ARMISD::VCNEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCEQ: - Result = DAG.getNode(ARMISD::VCEQZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCGE: - Result = DAG.getNode(ARMISD::VCGEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCLEZ: - Result = DAG.getNode(ARMISD::VCLEZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCGT: - Result = DAG.getNode(ARMISD::VCGTZ, dl, CmpVT, SingleOp); break; - case ARMISD::VCLTZ: - Result = DAG.getNode(ARMISD::VCLTZ, dl, CmpVT, SingleOp); break; - default: - Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); - } + Result = DAG.getNode(ARMISD::VCMPZ, dl, CmpVT, SingleOp, + DAG.getConstant(Opc, dl, MVT::i32)); } else { - Result = DAG.getNode(Opc, dl, CmpVT, Op0, Op1); + Result = DAG.getNode(ARMISD::VCMP, dl, CmpVT, Op0, Op1, + DAG.getConstant(Opc, dl, MVT::i32)); } Result = DAG.getSExtOrTrunc(Result, dl, VT); @@ -7477,7 +7456,8 @@ // Now return the result of comparing the shuffled vector with zero, // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. - return DAG.getNode(ARMISD::VCNEZ, dl, VT, Shuffled); + return DAG.getNode(ARMISD::VCMPZ, dl, VT, Shuffled, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); } static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG, @@ -7818,7 +7798,8 @@ // Now return the result of comparing the subvector with zero, // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. - return DAG.getNode(ARMISD::VCNEZ, dl, VT, ConVec); + return DAG.getNode(ARMISD::VCMPZ, dl, VT, ConVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); } static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG, @@ -7893,7 +7874,8 @@ // Now return the result of comparing the subvector with zero, // which will generate a real predicate, i.e. v4i1, v8i1 or v16i1. - return DAG.getNode(ARMISD::VCNEZ, dl, VT, SubVec); + return DAG.getNode(ARMISD::VCMPZ, dl, VT, SubVec, + DAG.getConstant(ARMCC::NE, dl, MVT::i32)); } /// isExtendedBUILD_VECTOR - Check if N is a constant BUILD_VECTOR where each @@ -11828,52 +11810,68 @@ return SDValue(); } +static bool isValidMVECond(unsigned CC) { + switch (CC) { + case ARMCC::EQ: + case ARMCC::NE: + case ARMCC::LE: + case ARMCC::GT: + case ARMCC::GE: + case ARMCC::LT: + case ARMCC::HS: + case ARMCC::HI: + return true; + default: + return false; + }; +} + static SDValue PerformORCombine_i1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain // together with predicates - struct Codes { - unsigned Opcode; - unsigned Opposite; - } InvertCodes[] = { - {ARMISD::VCEQ, ARMISD::VCNE}, - {ARMISD::VCEQZ, ARMISD::VCNEZ}, - {ARMISD::VCGE, ARMISD::VCLT}, - {ARMISD::VCGEZ, ARMISD::VCLTZ}, - {ARMISD::VCGT, ARMISD::VCLE}, - {ARMISD::VCGTZ, ARMISD::VCLEZ}, - }; - EVT VT = N->getValueType(0); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - unsigned Opposite0 = 0; - unsigned Opposite1 = 0; - for (auto Code : InvertCodes) { - if (N0->getOpcode() == Code.Opcode) - Opposite0 = Code.Opposite; - if (N0->getOpcode() == Code.Opposite) - Opposite0 = Code.Opcode; - if (N1->getOpcode() == Code.Opcode) - Opposite1 = Code.Opposite; - if (N1->getOpcode() == Code.Opposite) - Opposite1 = Code.Opcode; - } + ARMCC::CondCodes CondCode0 = ARMCC::AL; + ARMCC::CondCodes CondCode1 = ARMCC::AL; + if (N0->getOpcode() == ARMISD::VCMP) + CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(2)) + ->getZExtValue(); + if (N0->getOpcode() == ARMISD::VCMPZ) + CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(1)) + ->getZExtValue(); + if (N1->getOpcode() == ARMISD::VCMP) + CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(2)) + ->getZExtValue(); + if (N1->getOpcode() == ARMISD::VCMPZ) + CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(1)) + ->getZExtValue(); + + if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL) + return SDValue(); + + unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0); + unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1); - if (!Opposite0 || !Opposite1) + if (!isValidMVECond(Opposite0) || !isValidMVECond(Opposite1)) return SDValue(); std::vector Ops0; - for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) - Ops0.push_back(N0->getOperand(i)); + Ops0.push_back(N0->getOperand(0)); + if (N0->getOpcode() == ARMISD::VCMP) + Ops0.push_back(N0->getOperand(1)); + Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32)); std::vector Ops1; - for (unsigned i = 0, e = N1->getNumOperands(); i != e; ++i) - Ops1.push_back(N1->getOperand(i)); + Ops1.push_back(N1->getOperand(0)); + if (N1->getOpcode() == ARMISD::VCMP) + Ops1.push_back(N1->getOperand(1)); + Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32)); - SDValue NewN0 = DCI.DAG.getNode(Opposite0, SDLoc(N0), VT, Ops0); - SDValue NewN1 = DCI.DAG.getNode(Opposite1, SDLoc(N1), VT, Ops1); + SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0); + SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1); SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); Index: llvm/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrInfo.td +++ llvm/lib/Target/ARM/ARMInstrInfo.td @@ -268,23 +268,12 @@ def ARMWLS : SDNode<"ARMISD::WLS", SDT_ARMWhileLoop, [SDNPHasChain]>; -def SDTARMVCMP : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; -def SDTARMVCMPZ : SDTypeProfile<1, 1, []>; - -def ARMvceq : SDNode<"ARMISD::VCEQ", SDTARMVCMP>; -def ARMvceqz : SDNode<"ARMISD::VCEQZ", SDTARMVCMPZ>; -def ARMvcne : SDNode<"ARMISD::VCNE", SDTARMVCMP>; -def ARMvcnez : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>; -def ARMvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; -def ARMvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; -def ARMvcle : SDNode<"ARMISD::VCLE", SDTARMVCMP>; -def ARMvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; -def ARMvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; -def ARMvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; -def ARMvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; -def ARMvclt : SDNode<"ARMISD::VCLT", SDTARMVCMP>; -def ARMvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; -def ARMvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; +def SDTARMVCMP : SDTypeProfile<1, 3, [SDTCisInt<0>, SDTCisSameAs<1, 2>, + SDTCisInt<3>]>; +def SDTARMVCMPZ : SDTypeProfile<1, 2, [SDTCisInt<2>]>; + +def ARMvcmp : SDNode<"ARMISD::VCMP", SDTARMVCMP>; +def ARMvcmpz : SDNode<"ARMISD::VCMPZ", SDTARMVCMPZ>; //===----------------------------------------------------------------------===// // ARM Flag Definitions. Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2966,118 +2966,120 @@ def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; -multiclass unpred_vcmp_z { - def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1))), +multiclass unpred_vcmp_z { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; - def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1))), + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; - def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1))), + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1))))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4i32 MQPR:$v1))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmp_r { - def i8 : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), +multiclass unpred_vcmp_r { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; - def i16 : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; - def i32 : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; - def i8r : Pat<(v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)))), + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i16r : Pat<(v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)))), + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i32r : Pat<(v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)))), + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (opnode (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)))))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmpf_z { - def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1))), +multiclass unpred_vcmpf_z { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; - def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1))), + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmpf_r { - def f16 : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), +multiclass unpred_vcmpf_r { + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; - def f32 : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; - def f16r : Pat<(v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)))), + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; - def f32r : Pat<(v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)))), + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (opnode (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (opnode (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; } let Predicates = [HasMVEInt] in { - defm MVE_VCEQZ : unpred_vcmp_z; - defm MVE_VCNEZ : unpred_vcmp_z; - defm MVE_VCGEZ : unpred_vcmp_z; - defm MVE_VCLTZ : unpred_vcmp_z; - defm MVE_VCGTZ : unpred_vcmp_z; - defm MVE_VCLEZ : unpred_vcmp_z; - - defm MVE_VCEQ : unpred_vcmp_r; - defm MVE_VCNE : unpred_vcmp_r; - defm MVE_VCGE : unpred_vcmp_r; - defm MVE_VCLT : unpred_vcmp_r; - defm MVE_VCGT : unpred_vcmp_r; - defm MVE_VCLE : unpred_vcmp_r; - defm MVE_VCGTU : unpred_vcmp_r; - defm MVE_VCGEU : unpred_vcmp_r; + defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; + defm MVE_VCNE : unpred_vcmp_r<"i", 1>; + defm MVE_VCGE : unpred_vcmp_r<"s", 10>; + defm MVE_VCLT : unpred_vcmp_r<"s", 11>; + defm MVE_VCGT : unpred_vcmp_r<"s", 12>; + defm MVE_VCLE : unpred_vcmp_r<"s", 13>; + defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; + defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; } let Predicates = [HasMVEFloat] in { - defm MVE_VFCEQZ : unpred_vcmpf_z; - defm MVE_VFCNEZ : unpred_vcmpf_z; - defm MVE_VFCGEZ : unpred_vcmpf_z; - defm MVE_VFCLTZ : unpred_vcmpf_z; - defm MVE_VFCGTZ : unpred_vcmpf_z; - defm MVE_VFCLEZ : unpred_vcmpf_z; - - defm MVE_VFCGE : unpred_vcmpf_r; - defm MVE_VFCLT : unpred_vcmpf_r; - defm MVE_VFCGT : unpred_vcmpf_r; - defm MVE_VFCLE : unpred_vcmpf_r; - defm MVE_VFCEQ : unpred_vcmpf_r; - defm MVE_VFCNE : unpred_vcmpf_r; + defm MVE_VFCEQZ : unpred_vcmpf_z<0>; + defm MVE_VFCNEZ : unpred_vcmpf_z<1>; + defm MVE_VFCGEZ : unpred_vcmpf_z<10>; + defm MVE_VFCLTZ : unpred_vcmpf_z<11>; + defm MVE_VFCGTZ : unpred_vcmpf_z<12>; + defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + + defm MVE_VFCEQ : unpred_vcmpf_r<0>; + defm MVE_VFCNE : unpred_vcmpf_r<1>; + defm MVE_VFCGE : unpred_vcmpf_r<10>; + defm MVE_VFCLT : unpred_vcmpf_r<11>; + defm MVE_VFCGT : unpred_vcmpf_r<12>; + defm MVE_VFCLE : unpred_vcmpf_r<13>; } Index: llvm/lib/Target/ARM/ARMInstrNEON.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrNEON.td +++ llvm/lib/Target/ARM/ARMInstrNEON.td @@ -478,7 +478,8 @@ // NEON-specific DAG Nodes. //===----------------------------------------------------------------------===// -def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVCMP>; +def SDTARMVTST : SDTypeProfile<1, 2, [SDTCisInt<0>, SDTCisSameAs<1, 2>]>; +def NEONvtst : SDNode<"ARMISD::VTST", SDTARMVTST>; // Types for vector shift by immediates. The "SHX" version is for long and // narrow operations where the source and destination vectors have different @@ -3313,30 +3314,30 @@ // source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm, SDNode OpNode> { + string asm, int fc> { // 64-bit vector types. def v8i8 : N2V; + [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), (i32 fc))))]>; def v4i16 : N2V; + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), (i32 fc))))]>; def v2i32 : N2V; + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), (i32 fc))))]>; def v2f32 : N2V { + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v4f16 : N2V, + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3345,30 +3346,83 @@ def v16i8 : N2V; + [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), (i32 fc))))]>; def v8i16 : N2V; + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), (i32 fc))))]>; def v4i32 : N2V; + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), (i32 fc))))]>; def v4f32 : N2V { + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), (i32 fc))))]> { let Inst{10} = 1; // overwrite F = 1 } def v8f16 : N2V, + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), (i32 fc))))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } } +class N3VQ_cmp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} +// Basic 3-register operations: double- and quad-register. +class N3VD_cmp op21_20, bits<4> op11_8, bit op4, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + : N3V { + // All of these have a two-operand InstAlias. + let TwoOperandAliasConstraint = "$Vn = $Vd"; + let isCommutable = Commutable; +} + +// First with only element sizes of 8, 16 and 32 bits: +multiclass N3V_QHS_cmp op11_8, bit op4, + InstrItinClass itinD16, InstrItinClass itinD32, + InstrItinClass itinQ16, InstrItinClass itinQ32, + string OpcodeStr, string Dt, + int fc, bit Commutable = 0> { + // 64-bit vector types. + def v8i8 : N3VD_cmp; + def v4i16 : N3VD_cmp; + def v2i32 : N3VD_cmp; + + // 128-bit vector types. + def v16i8 : N3VQ_cmp; + def v8i16 : N3VQ_cmp; + def v4i32 : N3VQ_cmp; +} + // Neon 2-register vector intrinsics, // element sizes of 8, 16 and 32 bits: @@ -5013,67 +5067,67 @@ // Vector Comparisons. // VCEQ : Vector Compare Equal -defm VCEQ : N3V_QHS<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vceq", "i", ARMvceq, 1>; -def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - ARMvceq, 1>; -def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - ARMvceq, 1>; -def VCEQhd : N3VD<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - ARMvceq, 1>, +defm VCEQ : N3V_QHS_cmp<1, 0, 0b1000, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vceq", "i", 0, 1>; +def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, + 0, 1>; +def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, + 0, 1>; +def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; -def VCEQhq : N3VQ<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - ARMvceq, 1>, +def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, + 0, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", ARMvceqz>; + "$Vd, $Vm, #0", 0>; // VCGE : Vector Compare Greater Than or Equal -defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", ARMvcge, 0>; -defm VCGEu : N3V_QHS<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", ARMvcgeu, 0>; -def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - ARMvcge, 0>; -def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - ARMvcge, 0>; -def VCGEhd : N3VD<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - ARMvcge, 0>, +defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "s", 10, 0>; +defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcge", "u", 2, 0>; +def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, + 10, 0>; +def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, + 10, 0>; +def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGEhq : N3VQ<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - ARMvcge, 0>, +def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, + 10, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", ARMvcgez>; + "$Vd, $Vm, #0", 10>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", ARMvclez>; + "$Vd, $Vm, #0", 13>; } // VCGT : Vector Compare Greater Than -defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", ARMvcgt, 0>; -defm VCGTu : N3V_QHS<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", ARMvcgtu, 0>; -def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - ARMvcgt, 0>; -def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - ARMvcgt, 0>; -def VCGThd : N3VD<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - ARMvcgt, 0>, +defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "s", 12, 0>; +defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, + IIC_VSUBi4Q, "vcgt", "u", 8, 0>; +def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, + 12, 0>; +def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, + 12, 0>; +def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; -def VCGThq : N3VQ<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - ARMvcgt, 0>, +def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, + 12, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", ARMvcgtz>; + "$Vd, $Vm, #0", 12>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", ARMvcltz>; + "$Vd, $Vm, #0", 11>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE) Index: llvm/test/CodeGen/Thumb2/mve-pred-and.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-and.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-and.ll @@ -129,9 +129,8 @@ ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmov.i32 q2, #0x0 ; CHECK-NEXT: vpst -; CHECK-NEXT: vcmpt.u32 cs, q2, q1 +; CHECK-NEXT: vcmpt.u32 cs, q1, zr ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: Index: llvm/test/CodeGen/Thumb2/mve-pred-or.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -158,8 +158,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i32 q2, #0x0 -; CHECK-NEXT: vcmp.u32 cs, q2, q1 +; CHECK-NEXT: vcmp.u32 cs, q1, zr ; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vmrs r1, p0 Index: llvm/test/CodeGen/Thumb2/mve-pred-xor.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-xor.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-xor.ll @@ -151,8 +151,7 @@ define arm_aapcs_vfpcc <4 x i32> @cmpulez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i32 q2, #0x0 -; CHECK-NEXT: vcmp.u32 cs, q2, q1 +; CHECK-NEXT: vcmp.u32 cs, q1, zr ; CHECK-NEXT: vmrs r0, p0 ; CHECK-NEXT: vcmp.i32 eq, q0, zr ; CHECK-NEXT: vmrs r1, p0 Index: llvm/test/CodeGen/Thumb2/mve-vcmpz.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpz.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpz.ll @@ -110,8 +110,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcmp_ulez_v4i32(<4 x i32> %src, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_ulez_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i32 q3, #0x0 -; CHECK-NEXT: vcmp.u32 cs, q3, q0 +; CHECK-NEXT: vcmp.u32 cs, q0, zr ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -230,8 +229,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcmp_ulez_v8i16(<8 x i16> %src, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_ulez_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i32 q3, #0x0 -; CHECK-NEXT: vcmp.u16 cs, q3, q0 +; CHECK-NEXT: vcmp.u16 cs, q0, zr ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -350,8 +348,7 @@ define arm_aapcs_vfpcc <16 x i8> @vcmp_ulez_v16i8(<16 x i8> %src, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_ulez_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.i32 q3, #0x0 -; CHECK-NEXT: vcmp.u8 cs, q3, q0 +; CHECK-NEXT: vcmp.u8 cs, q0, zr ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: