Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -12649,58 +12649,44 @@ }; } +static ARMCC::CondCodes getVCMPCondCode(SDValue N) { + if (N->getOpcode() == ARMISD::VCMP) + return (ARMCC::CondCodes)N->getConstantOperandVal(2); + else if (N->getOpcode() == ARMISD::VCMPZ) + return (ARMCC::CondCodes)N->getConstantOperandVal(1); + else + llvm_unreachable("Not a VCMP/VCMPZ!"); +} + +static bool CanInvertMVEVCMP(SDValue N) { + ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N)); + return isValidMVECond(CC, N->getOperand(0).getValueType().isFloatingPoint()); +} + static SDValue PerformORCombine_i1(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain // together with predicates EVT VT = N->getValueType(0); + SDLoc DL(N); SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ARMCC::CondCodes CondCode0 = ARMCC::AL; - ARMCC::CondCodes CondCode1 = ARMCC::AL; - if (N0->getOpcode() == ARMISD::VCMP) - CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(2)) - ->getZExtValue(); - else if (N0->getOpcode() == ARMISD::VCMPZ) - CondCode0 = (ARMCC::CondCodes)cast(N0->getOperand(1)) - ->getZExtValue(); - if (N1->getOpcode() == ARMISD::VCMP) - CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(2)) - ->getZExtValue(); - else if (N1->getOpcode() == ARMISD::VCMPZ) - CondCode1 = (ARMCC::CondCodes)cast(N1->getOperand(1)) - ->getZExtValue(); - - if (CondCode0 == ARMCC::AL || CondCode1 == ARMCC::AL) - return SDValue(); - - unsigned Opposite0 = ARMCC::getOppositeCondition(CondCode0); - unsigned Opposite1 = ARMCC::getOppositeCondition(CondCode1); + auto IsFreelyInvertable = [&](SDValue V) { + if (V->getOpcode() == ARMISD::VCMP || V->getOpcode() == ARMISD::VCMPZ) + return CanInvertMVEVCMP(V); + return false; + }; - if (!isValidMVECond(Opposite0, - N0->getOperand(0)->getValueType(0).isFloatingPoint()) || - !isValidMVECond(Opposite1, - N1->getOperand(0)->getValueType(0).isFloatingPoint())) + // At least one operand must be freely invertable. + if (!(IsFreelyInvertable(N0) || IsFreelyInvertable(N1))) return SDValue(); - SmallVector Ops0; - Ops0.push_back(N0->getOperand(0)); - if (N0->getOpcode() == ARMISD::VCMP) - Ops0.push_back(N0->getOperand(1)); - Ops0.push_back(DCI.DAG.getConstant(Opposite0, SDLoc(N0), MVT::i32)); - SmallVector Ops1; - Ops1.push_back(N1->getOperand(0)); - if (N1->getOpcode() == ARMISD::VCMP) - Ops1.push_back(N1->getOperand(1)); - Ops1.push_back(DCI.DAG.getConstant(Opposite1, SDLoc(N1), MVT::i32)); - - SDValue NewN0 = DCI.DAG.getNode(N0->getOpcode(), SDLoc(N0), VT, Ops0); - SDValue NewN1 = DCI.DAG.getNode(N1->getOpcode(), SDLoc(N1), VT, Ops1); - SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); - return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, - DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); + SDValue NewN0 = DCI.DAG.getLogicalNOT({N0}, N0, VT); + SDValue NewN1 = DCI.DAG.getLogicalNOT({N1}, N1, VT); + SDValue And = DCI.DAG.getNode(ISD::AND, DL, VT, NewN0, NewN1); + return DCI.DAG.getLogicalNOT(DL, And, VT); } /// PerformORCombine - Target-specific dag combine xforms for ISD::OR @@ -12821,6 +12807,27 @@ return Result; } + if (Subtarget->hasMVEIntegerOps()) { + // fold (xor(vcmp/z, 1)) into a vcmp with the opposite condition. + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + const TargetLowering *TLI = Subtarget->getTargetLowering(); + if (TLI->isConstTrueVal(N1.getNode()) && + (N0->getOpcode() == ARMISD::VCMP || N0->getOpcode() == ARMISD::VCMPZ)) { + if (CanInvertMVEVCMP(N0)) { + SDLoc DL(N0); + ARMCC::CondCodes CC = ARMCC::getOppositeCondition(getVCMPCondCode(N0)); + + SmallVector Ops; + Ops.push_back(N0->getOperand(0)); + if (N0->getOpcode() == ARMISD::VCMP) + Ops.push_back(N0->getOperand(1)); + Ops.push_back(DCI.DAG.getConstant(CC, DL, MVT::i32)); + return DCI.DAG.getNode(N0->getOpcode(), DL, N0->getValueType(0), Ops); + } + } + } + return SDValue(); } Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll @@ -296,9 +296,8 @@ define dso_local i32 @or_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32* noalias nocapture readonly %d, i32 %N) { ; CHECK-LABEL: or_mul_reduce_add: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: sub sp, #4 -; CHECK-NEXT: ldr.w r12, [sp, #20] +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: ldr.w r12, [sp, #16] ; CHECK-NEXT: cmp.w r12, #0 ; CHECK-NEXT: beq .LBB3_4 ; CHECK-NEXT: @ %bb.1: @ %vector.ph @@ -315,21 +314,16 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vctp.32 r12 ; CHECK-NEXT: vmov q0, q1 -; CHECK-NEXT: vstr p0, [sp] @ 4-byte Spill -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vldrwt.u32 q1, [r1], #16 ; CHECK-NEXT: vldrwt.u32 q2, [r0], #16 +; CHECK-NEXT: vpnot ; CHECK-NEXT: vsub.i32 q1, q2, q1 -; CHECK-NEXT: vcmp.i32 eq, q1, zr -; CHECK-NEXT: vmrs r5, p0 -; CHECK-NEXT: vldr p0, [sp] @ 4-byte Reload -; CHECK-NEXT: vmrs r6, p0 -; CHECK-NEXT: orrs r5, r6 -; CHECK-NEXT: vmsr p0, r5 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrwt.u32 q1, [r3], #16 -; CHECK-NEXT: vldrwt.u32 q2, [r2], #16 +; CHECK-NEXT: sub.w r12, r12, #4 +; CHECK-NEXT: vpstee +; CHECK-NEXT: vcmpt.i32 ne, q1, zr +; CHECK-NEXT: vldrwe.u32 q1, [r3], #16 +; CHECK-NEXT: vldrwe.u32 q2, [r2], #16 ; CHECK-NEXT: vmul.i32 q1, q2, q1 ; CHECK-NEXT: vadd.i32 q1, q1, q0 ; CHECK-NEXT: le lr, .LBB3_2 @@ -337,12 +331,10 @@ ; CHECK-NEXT: vctp.32 r4 ; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: vaddv.u32 r0, q0 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} ; CHECK-NEXT: .LBB3_4: ; CHECK-NEXT: movs r0, #0 -; CHECK-NEXT: add sp, #4 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: pop {r4, r5, r7, pc} entry: %cmp8 = icmp eq i32 %N, 0 br i1 %cmp8, label %for.cond.cleanup, label %vector.ph Index: llvm/test/CodeGen/Thumb2/mve-pred-or.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -124,12 +124,10 @@ ; CHECK-LABEL: cmpulez_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 cs, q1, zr -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -247,12 +245,10 @@ ; CHECK-LABEL: cmpult_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 hi, q2, q1 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -266,12 +262,10 @@ ; CHECK-LABEL: cmpugt_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 hi, q1, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -285,12 +279,10 @@ ; CHECK-LABEL: cmpule_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 cs, q2, q1 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -304,12 +296,10 @@ ; CHECK-LABEL: cmpuge_v4i1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vcmp.u32 cs, q1, q2 -; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 -; CHECK-NEXT: vmsr p0, r0 -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpnot +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q0, zr +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer Index: llvm/test/CodeGen/Thumb2/mve-vcmpf.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -483,8 +483,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q1, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <4 x float> %src, %src2 @@ -535,8 +535,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <4 x float> %src, %src2 @@ -587,8 +587,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, q1 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <4 x float> %src, %src2 @@ -639,8 +639,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, q1 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <4 x float> %src, %src2 @@ -1897,8 +1897,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q1, q0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q1, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <8 x half> %src, %src2 @@ -2021,8 +2021,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <8 x half> %src, %src2 @@ -2145,8 +2145,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, q1 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <8 x half> %src, %src2 @@ -2269,8 +2269,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, q1 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <8 x half> %src, %src2 Index: llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -508,8 +508,8 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -563,8 +563,8 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -618,8 +618,8 @@ ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -673,8 +673,8 @@ ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -1945,8 +1945,8 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2070,8 +2070,8 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2195,8 +2195,8 @@ ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2320,8 +2320,8 @@ ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -3093,8 +3093,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3148,8 +3148,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3203,8 +3203,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3258,8 +3258,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -4530,8 +4530,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4655,8 +4655,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4780,8 +4780,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4905,8 +4905,8 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p Index: llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -483,8 +483,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <4 x float> %src, zeroinitializer @@ -535,8 +535,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <4 x float> %src, zeroinitializer @@ -587,8 +587,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <4 x float> %src, zeroinitializer @@ -639,8 +639,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <4 x float> %src, zeroinitializer @@ -1861,8 +1861,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <8 x half> %src, zeroinitializer @@ -1981,8 +1981,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <8 x half> %src, zeroinitializer @@ -2101,8 +2101,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <8 x half> %src, zeroinitializer @@ -2221,8 +2221,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <8 x half> %src, zeroinitializer @@ -2956,8 +2956,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <4 x float> zeroinitializer, %src @@ -3008,8 +3008,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <4 x float> zeroinitializer, %src @@ -3060,8 +3060,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <4 x float> zeroinitializer, %src @@ -3112,8 +3112,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <4 x float> zeroinitializer, %src @@ -4334,8 +4334,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <8 x half> zeroinitializer, %src @@ -4454,8 +4454,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <8 x half> zeroinitializer, %src @@ -4574,8 +4574,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <8 x half> zeroinitializer, %src @@ -4694,8 +4694,8 @@ ; ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr +; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <8 x half> zeroinitializer, %src