Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.h +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.h @@ -137,10 +137,12 @@ VCNEZ, // Vector compare not equal to zero (MVE) VCGE, // Vector compare greater than or equal. VCGEZ, // Vector compare greater than or equal to zero. + VCLE, // Vector compare less than or equal. VCLEZ, // Vector compare less than or equal to zero. VCGEU, // Vector compare unsigned greater than or equal. VCGT, // Vector compare greater than. VCGTZ, // Vector compare greater than zero. + VCLT, // Vector compare less than. VCLTZ, // Vector compare less than zero. VCGTU, // Vector compare unsigned greater than. VTST, // Vector test bits. Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -1521,10 +1521,12 @@ case ARMISD::VCNEZ: return "ARMISD::VCNEZ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEZ: return "ARMISD::VCGEZ"; + case ARMISD::VCLE: return "ARMISD::VCLE"; case ARMISD::VCLEZ: return "ARMISD::VCLEZ"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; case ARMISD::VCGT: return "ARMISD::VCGT"; case ARMISD::VCGTZ: return "ARMISD::VCGTZ"; + case ARMISD::VCLT: return "ARMISD::VCLT"; case ARMISD::VCLTZ: return "ARMISD::VCLTZ"; case ARMISD::VCGTU: return "ARMISD::VCGTU"; case ARMISD::VTST: return "ARMISD::VTST"; @@ -11820,6 +11822,57 @@ return SDValue(); } +static SDValue PerformORCombine_i1(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Try to invert "or A, B" -> "and ~A, ~B", as the "and" is easier to chain + // together with predicates + struct Codes { + unsigned Opcode; + unsigned Opposite; + } InvertCodes[] = { + {ARMISD::VCEQ, ARMISD::VCNE}, + {ARMISD::VCEQZ, ARMISD::VCNEZ}, + {ARMISD::VCGE, ARMISD::VCLT}, + {ARMISD::VCGEZ, ARMISD::VCLTZ}, + {ARMISD::VCGT, ARMISD::VCLE}, + {ARMISD::VCGTZ, ARMISD::VCLEZ}, + }; + + EVT VT = N->getValueType(0); + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + + unsigned Opposite0 = 0; + unsigned Opposite1 = 0; + for (auto Code : InvertCodes) { + if (N0->getOpcode() == Code.Opcode) + Opposite0 = Code.Opposite; + if (N0->getOpcode() == Code.Opposite) + Opposite0 = Code.Opcode; + if (N1->getOpcode() == Code.Opcode) + Opposite1 = Code.Opposite; + if (N1->getOpcode() == Code.Opposite) + Opposite1 = Code.Opcode; + } + + if (!Opposite0 || !Opposite1) + return SDValue(); + + SmallVector Ops0; + for (unsigned i = 0, e = N0->getNumOperands(); i != e; ++i) + Ops0.push_back(N0->getOperand(i)); + SmallVector Ops1; + for (unsigned i = 0, e = N1->getNumOperands(); i != e; ++i) + Ops1.push_back(N1->getOperand(i)); + + SDValue NewN0 = DCI.DAG.getNode(Opposite0, SDLoc(N0), VT, Ops0); + SDValue NewN1 = DCI.DAG.getNode(Opposite1, SDLoc(N1), VT, Ops1); + SDValue And = DCI.DAG.getNode(ISD::AND, SDLoc(N), VT, NewN0, NewN1); + return DCI.DAG.getNode(ISD::XOR, SDLoc(N), VT, And, + DCI.DAG.getAllOnesConstant(SDLoc(N), VT)); +} + /// PerformORCombine - Target-specific dag combine xforms for ISD::OR static SDValue PerformORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, @@ -11904,6 +11957,10 @@ } } + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v4i1 || VT == MVT::v8i1 || VT == MVT::v16i1)) + return PerformORCombine_i1(N, DCI, Subtarget); + // Try to use the ARM/Thumb2 BFI (bitfield insert) instruction when // reasonable. if (N0.getOpcode() == ISD::AND && N0.hasOneUse()) { Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -274,10 +274,12 @@ def ARMvcnez : SDNode<"ARMISD::VCNEZ", SDTARMVCMPZ>; def ARMvcge : SDNode<"ARMISD::VCGE", SDTARMVCMP>; def ARMvcgez : SDNode<"ARMISD::VCGEZ", SDTARMVCMPZ>; +def ARMvcle : SDNode<"ARMISD::VCLE", SDTARMVCMP>; def ARMvclez : SDNode<"ARMISD::VCLEZ", SDTARMVCMPZ>; def ARMvcgeu : SDNode<"ARMISD::VCGEU", SDTARMVCMP>; def ARMvcgt : SDNode<"ARMISD::VCGT", SDTARMVCMP>; def ARMvcgtz : SDNode<"ARMISD::VCGTZ", SDTARMVCMPZ>; +def ARMvclt : SDNode<"ARMISD::VCLT", SDTARMVCMP>; def ARMvcltz : SDNode<"ARMISD::VCLTZ", SDTARMVCMPZ>; def ARMvcgtu : SDNode<"ARMISD::VCGTU", SDTARMVCMP>; Index: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td +++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td @@ -3041,15 +3041,17 @@ let Predicates = [HasMVEInt] in { defm MVE_VCEQZ : unpred_vcmp_z; defm MVE_VCNEZ : unpred_vcmp_z; - defm MVE_VCLEZ : unpred_vcmp_z; - defm MVE_VCGTZ : unpred_vcmp_z; - defm MVE_VCLTZ : unpred_vcmp_z; defm MVE_VCGEZ : unpred_vcmp_z; + defm MVE_VCLTZ : unpred_vcmp_z; + defm MVE_VCGTZ : unpred_vcmp_z; + defm MVE_VCLEZ : unpred_vcmp_z; defm MVE_VCEQ : unpred_vcmp_r; defm MVE_VCNE : unpred_vcmp_r; - defm MVE_VCGT : unpred_vcmp_r; defm MVE_VCGE : unpred_vcmp_r; + defm MVE_VCLT : unpred_vcmp_r; + defm MVE_VCGT : unpred_vcmp_r; + defm MVE_VCLE : unpred_vcmp_r; defm MVE_VCGTU : unpred_vcmp_r; defm MVE_VCGEU : unpred_vcmp_r; } @@ -3057,13 +3059,15 @@ let Predicates = [HasMVEFloat] in { defm MVE_VFCEQZ : unpred_vcmpf_z; defm MVE_VFCNEZ : unpred_vcmpf_z; - defm MVE_VFCLEZ : unpred_vcmpf_z; - defm MVE_VFCGTZ : unpred_vcmpf_z; - defm MVE_VFCLTZ : unpred_vcmpf_z; defm MVE_VFCGEZ : unpred_vcmpf_z; + defm MVE_VFCLTZ : unpred_vcmpf_z; + defm MVE_VFCGTZ : unpred_vcmpf_z; + defm MVE_VFCLEZ : unpred_vcmpf_z; - defm MVE_VFCGT : unpred_vcmpf_r; defm MVE_VFCGE : unpred_vcmpf_r; + defm MVE_VFCLT : unpred_vcmpf_r; + defm MVE_VFCGT : unpred_vcmpf_r; + defm MVE_VFCLE : unpred_vcmpf_r; defm MVE_VFCEQ : unpred_vcmpf_r; defm MVE_VFCNE : unpred_vcmpf_r; } Index: llvm/trunk/test/CodeGen/Thumb2/mve-pred-or.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-pred-or.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-pred-or.ll @@ -4,11 +4,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeqz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpeqz_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 eq, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -23,11 +24,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpnez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpnez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 eq, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -42,11 +44,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsltz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpsltz_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 lt, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 ge, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -61,11 +64,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgtz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpsgtz_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 gt, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 le, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -80,11 +84,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpslez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 le, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 gt, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -99,11 +104,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgez_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpsgez_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 ge, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 lt, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -132,11 +138,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpugtz_v4i1(<4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: cmpugtz_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 ne, q1, zr +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 eq, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -185,11 +192,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpeq_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: cmpeq_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 eq, q1, q2 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 ne, q1, q2 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -204,11 +212,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpne_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: cmpne_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i32 ne, q1, q2 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i32 eq, q1, q2 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -223,11 +232,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpslt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: cmpslt_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 gt, q2, q1 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 le, q2, q1 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -242,11 +252,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsgt_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: cmpsgt_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 gt, q1, q2 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 le, q1, q2 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -261,11 +272,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsle_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: cmpsle_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 ge, q2, q1 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 lt, q2, q1 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -280,11 +292,12 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsge_v4i1(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) { ; CHECK-LABEL: cmpsge_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.s32 ge, q1, q2 +; CHECK-NEXT: vcmp.i32 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.s32 lt, q1, q2 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i32 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -378,11 +391,12 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeqz_v8i1(<8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: cmpeqz_v8i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i16 eq, q1, zr +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i16 ne, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i16 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -397,11 +411,12 @@ define arm_aapcs_vfpcc <8 x i16> @cmpeq_v8i1(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) { ; CHECK-LABEL: cmpeq_v8i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i16 eq, q1, q2 +; CHECK-NEXT: vcmp.i16 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i16 ne, q1, q2 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i16 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -417,11 +432,12 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeqz_v16i1(<16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: cmpeqz_v16i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i8 eq, q1, zr +; CHECK-NEXT: vcmp.i8 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i8 ne, q1, zr ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i8 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr @@ -436,11 +452,12 @@ define arm_aapcs_vfpcc <16 x i8> @cmpeq_v16i1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) { ; CHECK-LABEL: cmpeq_v16i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vcmp.i8 eq, q1, q2 +; CHECK-NEXT: vcmp.i8 ne, q0, zr +; CHECK-NEXT: movw r1, #65535 +; CHECK-NEXT: vpst +; CHECK-NEXT: vcmpt.i8 ne, q1, q2 ; CHECK-NEXT: vmrs r0, p0 -; CHECK-NEXT: vcmp.i8 eq, q0, zr -; CHECK-NEXT: vmrs r1, p0 -; CHECK-NEXT: orrs r0, r1 +; CHECK-NEXT: eors r0, r1 ; CHECK-NEXT: vmsr p0, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr Index: llvm/trunk/test/CodeGen/Thumb2/mve-vcmpf.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-vcmpf.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -115,11 +115,12 @@ ; ; CHECK-MVEFP-LABEL: vcmp_one_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -410,14 +411,9 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -769,11 +765,12 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ord_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -830,14 +827,9 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uno_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -1215,11 +1207,12 @@ ; ; CHECK-MVEFP-LABEL: vcmp_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -2136,14 +2129,9 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3239,11 +3227,12 @@ ; ; CHECK-MVEFP-LABEL: vcmp_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -3424,14 +3413,9 @@ ; ; CHECK-MVEFP-LABEL: vcmp_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q1, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: Index: llvm/trunk/test/CodeGen/Thumb2/mve-vcmpfz.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ llvm/trunk/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -116,11 +116,12 @@ ; CHECK-MVEFP-LABEL: vcmp_one_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q3 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -412,14 +413,9 @@ ; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q3 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q3 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -772,11 +768,12 @@ ; CHECK-MVEFP-LABEL: vcmp_ord_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q3 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -834,14 +831,9 @@ ; CHECK-MVEFP-LABEL: vcmp_uno_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q3 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q3 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -1188,11 +1180,12 @@ ; CHECK-MVEFP-LABEL: vcmp_one_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q3 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2030,14 +2023,9 @@ ; CHECK-MVEFP-LABEL: vcmp_ueq_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q3 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q3 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -3038,11 +3026,12 @@ ; CHECK-MVEFP-LABEL: vcmp_ord_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q3 +; CHECK-MVEFP-NEXT: movw r1, #65535 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 ; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 +; CHECK-MVEFP-NEXT: eors r0, r1 ; CHECK-MVEFP-NEXT: vmsr p0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -3208,14 +3197,9 @@ ; CHECK-MVEFP-LABEL: vcmp_uno_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov.i32 q3, #0x0 -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q3 -; CHECK-MVEFP-NEXT: vmrs r0, p0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 -; CHECK-MVEFP-NEXT: vmrs r1, p0 -; CHECK-MVEFP-NEXT: orrs r0, r1 -; CHECK-MVEFP-NEXT: movw r1, #65535 -; CHECK-MVEFP-NEXT: eors r0, r1 -; CHECK-MVEFP-NEXT: vmsr p0, r0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpst +; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q3 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: