diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -1460,6 +1460,9 @@ setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::XOR); + if (Subtarget->hasMVEIntegerOps()) + setTargetDAGCombine(ISD::VSELECT); + if (Subtarget->hasV6Ops()) setTargetDAGCombine(ISD::SRL); if (Subtarget->isThumb1Only()) @@ -11719,6 +11722,42 @@ return SDValue(); } +static SDValue PerformVSELECTCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const ARMSubtarget *Subtarget) { + // Transforms vselect(not(cond), lhs, rhs) into vselect(cond, rhs, lhs). + // + // We need to re-implement this optimization here as the implementation in the + // Target-Independent DAGCombiner does not handle the kind of constant we make + // (it calls isConstOrConstSplat with AllowTruncation set to false - and for + // good reason, allowing truncation there would break other targets). + // + // Currently, this is only done for MVE, as it's the only target that benefits + // from this transformation (e.g. VPNOT+VPSEL becomes a single VPSEL). + if (!Subtarget->hasMVEIntegerOps()) + return SDValue(); + + if (N->getOperand(0).getOpcode() != ISD::XOR) + return SDValue(); + SDValue XOR = N->getOperand(0); + + // Check if the XOR's RHS is either a 1, or a BUILD_VECTOR of 1s. + // It is important to check with truncation allowed as the BUILD_VECTORs we + // generate in those situations will truncate their operands. + ConstantSDNode *Const = + isConstOrConstSplat(XOR->getOperand(1), /*AllowUndefs*/ false, + /*AllowTruncation*/ true); + if (!Const || !Const->isOne()) + return SDValue(); + + // Rewrite into vselect(cond, rhs, lhs). + SDValue Cond = XOR->getOperand(0); + SDValue LHS = N->getOperand(1); + SDValue RHS = N->getOperand(2); + EVT Type = N->getValueType(0); + return DCI.DAG.getNode(ISD::VSELECT, SDLoc(N), Type, Cond, RHS, LHS); +} + static SDValue PerformABSCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -15225,6 +15264,7 @@ DAGCombinerInfo &DCI) const { switch (N->getOpcode()) { default: break; + case ISD::VSELECT: return PerformVSELECTCombine(N, DCI, Subtarget); case ISD::ABS: return PerformABSCombine(N, DCI, Subtarget); case ARMISD::ADDE: return PerformADDECombine(N, DCI, Subtarget); case ARMISD::UMLAL: return PerformUMLALCombine(N, DCI.DAG, Subtarget); diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-or.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-or.ll @@ -6,8 +6,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 ne, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -22,8 +21,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 eq, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -38,8 +36,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 ge, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -54,8 +51,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -70,8 +66,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 gt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -86,8 +81,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 lt, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -116,8 +110,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 eq, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -165,8 +158,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 ne, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -181,8 +173,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.i32 eq, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -197,8 +188,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q2, q1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -213,8 +203,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 le, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -229,8 +218,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 lt, q2, q1 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -245,8 +233,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i32 ne, q0, zr ; CHECK-NEXT: vcmpt.s32 lt, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <4 x i32> %a, zeroinitializer @@ -340,8 +327,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vcmpt.i16 ne, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <8 x i16> %a, zeroinitializer @@ -356,8 +342,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i16 ne, q0, zr ; CHECK-NEXT: vcmpt.i16 ne, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <8 x i16> %a, zeroinitializer @@ -373,8 +358,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i8 ne, q0, zr ; CHECK-NEXT: vcmpt.i8 ne, q1, zr -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <16 x i8> %a, zeroinitializer @@ -389,8 +373,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vpt.i8 ne, q0, zr ; CHECK-NEXT: vcmpt.i8 ne, q1, q2 -; CHECK-NEXT: vpnot -; CHECK-NEXT: vpsel q0, q0, q1 +; CHECK-NEXT: vpsel q0, q1, q0 ; CHECK-NEXT: bx lr entry: %c1 = icmp eq <16 x i8> %a, zeroinitializer diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpf.ll @@ -109,8 +109,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <4 x float> %src, %src2 @@ -485,8 +484,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <4 x float> %src, %src2 @@ -538,8 +536,7 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <4 x float> %src, %src2 @@ -591,8 +588,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <4 x float> %src, %src2 @@ -644,8 +640,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <4 x float> %src, %src2 @@ -698,8 +693,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <4 x float> %src, %src2 @@ -1019,8 +1013,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <8 x half> %src, %src2 @@ -1905,8 +1898,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q1, q0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <8 x half> %src, %src2 @@ -2030,8 +2022,7 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q1, q0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <8 x half> %src, %src2 @@ -2155,8 +2146,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <8 x half> %src, %src2 @@ -2280,8 +2270,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <8 x half> %src, %src2 @@ -2406,8 +2395,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q1, q0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, q1 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <8 x half> %src, %src2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -113,8 +113,7 @@ ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -510,8 +509,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -566,8 +564,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -622,8 +619,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -678,8 +674,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -735,8 +730,7 @@ ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -1060,8 +1054,7 @@ ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -1953,8 +1946,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2079,8 +2071,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2205,8 +2196,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2331,8 +2321,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2458,8 +2447,7 @@ ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -2710,8 +2698,7 @@ ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3107,8 +3094,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3163,8 +3149,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3219,8 +3204,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3275,8 +3259,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3332,8 +3315,7 @@ ; CHECK-MVEFP-NEXT: vmov r0, s4 ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 +; CHECK-MVEFP-NEXT: vpsel q0, q3, q2 ; CHECK-MVEFP-NEXT: bx lr entry: %i = insertelement <4 x float> undef, float %src2, i32 0 @@ -3657,8 +3639,7 @@ ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4550,8 +4531,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4676,8 +4656,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4802,8 +4781,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -4928,8 +4906,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p @@ -5055,8 +5032,7 @@ ; CHECK-MVEFP-NEXT: ldrh r0, [r0] ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, r0 -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %src2 = load half, half* %src2p diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfz.ll @@ -109,8 +109,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <4 x float> %src, zeroinitializer @@ -485,8 +484,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <4 x float> %src, zeroinitializer @@ -538,8 +536,7 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <4 x float> %src, zeroinitializer @@ -591,8 +588,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <4 x float> %src, zeroinitializer @@ -644,8 +640,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <4 x float> %src, zeroinitializer @@ -698,8 +693,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <4 x float> %src, zeroinitializer @@ -1011,8 +1005,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <8 x half> %src, zeroinitializer @@ -1869,8 +1862,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <8 x half> %src, zeroinitializer @@ -1990,8 +1982,7 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <8 x half> %src, zeroinitializer @@ -2111,8 +2102,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <8 x half> %src, zeroinitializer @@ -2232,8 +2222,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <8 x half> %src, zeroinitializer @@ -2354,8 +2343,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <8 x half> %src, zeroinitializer @@ -2594,8 +2582,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <4 x float> zeroinitializer, %src @@ -2970,8 +2957,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <4 x float> zeroinitializer, %src @@ -3023,8 +3009,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <4 x float> zeroinitializer, %src @@ -3076,8 +3061,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ult_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <4 x float> zeroinitializer, %src @@ -3129,8 +3113,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ule_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <4 x float> zeroinitializer, %src @@ -3183,8 +3166,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f32 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f32 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <4 x float> zeroinitializer, %src @@ -3496,8 +3478,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp one <8 x half> zeroinitializer, %src @@ -4354,8 +4335,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ugt_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 ge, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ugt <8 x half> zeroinitializer, %src @@ -4475,8 +4455,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_uge_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp uge <8 x half> zeroinitializer, %src @@ -4596,8 +4575,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ult_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ult <8 x half> zeroinitializer, %src @@ -4717,8 +4695,7 @@ ; CHECK-MVEFP-LABEL: vcmp_r_ule_v8f16: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ule <8 x half> zeroinitializer, %src @@ -4839,8 +4816,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vpt.f16 le, q0, zr ; CHECK-MVEFP-NEXT: vcmpt.f16 gt, q0, zr -; CHECK-MVEFP-NEXT: vpnot -; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 +; CHECK-MVEFP-NEXT: vpsel q0, q2, q1 ; CHECK-MVEFP-NEXT: bx lr entry: %c = fcmp ord <8 x half> zeroinitializer, %src