Index: llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp =================================================================== --- llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -2723,25 +2723,6 @@ return false; } -/// getSwappedCondition - assume the flags are set by MI(a,b), return -/// the condition code if we modify the instructions such that flags are -/// set by MI(b,a). -inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { - switch (CC) { - default: return ARMCC::AL; - case ARMCC::EQ: return ARMCC::EQ; - case ARMCC::NE: return ARMCC::NE; - case ARMCC::HS: return ARMCC::LS; - case ARMCC::LO: return ARMCC::HI; - case ARMCC::HI: return ARMCC::LO; - case ARMCC::LS: return ARMCC::HS; - case ARMCC::GE: return ARMCC::LE; - case ARMCC::LT: return ARMCC::GT; - case ARMCC::GT: return ARMCC::LT; - case ARMCC::LE: return ARMCC::GE; - } -} - /// getCmpToAddCondition - assume the flags are set by CMP(a,b), return /// the condition code if we modify the instructions such that flags are /// set by ADD(a,b,X). Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -9106,6 +9106,12 @@ ST->getMemOperand()); } +static bool isZeroVector(SDValue N) { + return (ISD::isBuildVectorAllZeros(N.getNode()) || + (N->getOpcode() == ARMISD::VMOVIMM && + isNullConstant(N->getOperand(0)))); +} + static SDValue LowerMLOAD(SDValue Op, SelectionDAG &DAG) { MaskedLoadSDNode *N = cast(Op.getNode()); MVT VT = Op.getSimpleValueType(); @@ -9113,13 +9119,7 @@ SDValue PassThru = N->getPassThru(); SDLoc dl(Op); - auto IsZero = [](SDValue PassThru) { - return (ISD::isBuildVectorAllZeros(PassThru.getNode()) || - (PassThru->getOpcode() == ARMISD::VMOVIMM && - isNullConstant(PassThru->getOperand(0)))); - }; - - if (IsZero(PassThru)) + if (isZeroVector(PassThru)) return Op; // MVE Masked loads use zero as the passthru value. Here we convert undef to @@ -9133,7 +9133,7 @@ SDValue Combo = NewLoad; if (!PassThru.isUndef() && (PassThru.getOpcode() != ISD::BITCAST || - !IsZero(PassThru->getOperand(0)))) + !isZeroVector(PassThru->getOperand(0)))) Combo = DAG.getNode(ISD::VSELECT, dl, VT, Mask, NewLoad, PassThru); return DAG.getMergeValues({Combo, NewLoad.getValue(1)}, dl); } @@ -13065,6 +13065,39 @@ return SDValue(); } +static SDValue PerformVCMPCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + if (!Subtarget->hasMVEIntegerOps()) + return SDValue(); + + EVT VT = N->getValueType(0); + SDValue Op0 = N->getOperand(0); + SDValue Op1 = N->getOperand(1); + ARMCC::CondCodes Cond = + (ARMCC::CondCodes)cast(N->getOperand(2))->getZExtValue(); + SDLoc dl(N); + + // vcmp X, 0, cc -> vcmpz X, cc + if (isZeroVector(Op1)) + return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op0, + N->getOperand(2)); + + unsigned SwappedCond = getSwappedCondition(Cond); + if (isValidMVECond(SwappedCond, VT.isFloatingPoint())) { + // vcmp 0, X, cc -> vcmpz X, reversed(cc) + if (isZeroVector(Op0)) + return DCI.DAG.getNode(ARMISD::VCMPZ, dl, VT, Op1, + DCI.DAG.getConstant(SwappedCond, dl, MVT::i32)); + // vcmp vdup(Y), X, cc -> vcmp X, vdup(Y), reversed(cc) + if (Op0->getOpcode() == ARMISD::VDUP && Op1->getOpcode() != ARMISD::VDUP) + return DCI.DAG.getNode(ARMISD::VCMP, dl, VT, Op1, Op0, + DCI.DAG.getConstant(SwappedCond, dl, MVT::i32)); + } + + return SDValue(); +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -14786,6 +14819,8 @@ return PerformARMBUILD_VECTORCombine(N, DCI); case ARMISD::PREDICATE_CAST: return PerformPREDICATE_CASTCombine(N, DCI); + case ARMISD::VCMP: + return PerformVCMPCombine(N, DCI, Subtarget); case ARMISD::SMULWB: { unsigned BitWidth = N->getValueType(0).getSizeInBits(); APInt DemandedMask = APInt::getLowBitsSet(BitWidth, 16); Index: llvm/lib/Target/ARM/Utils/ARMBaseInfo.h =================================================================== --- llvm/lib/Target/ARM/Utils/ARMBaseInfo.h +++ llvm/lib/Target/ARM/Utils/ARMBaseInfo.h @@ -64,6 +64,25 @@ case LE: return GT; } } + +/// getSwappedCondition - assume the flags are set by MI(a,b), return +/// the condition code if we modify the instructions such that flags are +/// set by MI(b,a). +inline static ARMCC::CondCodes getSwappedCondition(ARMCC::CondCodes CC) { + switch (CC) { + default: return ARMCC::AL; + case ARMCC::EQ: return ARMCC::EQ; + case ARMCC::NE: return ARMCC::NE; + case ARMCC::HS: return ARMCC::LS; + case ARMCC::LO: return ARMCC::HI; + case ARMCC::HI: return ARMCC::LO; + case ARMCC::LS: return ARMCC::HS; + case ARMCC::GE: return ARMCC::LE; + case ARMCC::LT: return ARMCC::GT; + case ARMCC::GT: return ARMCC::LT; + case ARMCC::LE: return ARMCC::GE; + } +} } // end namespace ARMCC namespace ARMVCC { Index: llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -91,9 +91,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_one_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -324,9 +323,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -594,9 +592,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ord_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -641,9 +638,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uno_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -869,9 +865,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1408,9 +1403,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2038,9 +2032,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2145,9 +2138,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 -; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2248,9 +2240,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_one_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f32 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2481,9 +2472,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ueq_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f32 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -2751,9 +2741,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ord_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2798,9 +2787,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_uno_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3026,9 +3014,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -3565,9 +3552,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -4195,9 +4181,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -4302,9 +4287,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q0, q3 -; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: