diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3299,6 +3299,31 @@ (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; } +multiclass unpred_vcmp_r_reversible { + defm "": unpred_vcmp_r; + + // Additional patterns that match the vector/scalar comparisons the + // opposite way round, with the ARMvdup in the first operand of the + // ARMvcmp. These will usually need a different condition code + // (except for the symmetric conditions EQ and NE). They're in a + // separate multiclass because the unsigned CS and HI comparisons + // don't have reversed forms. + + def : Pat<(v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; + def : Pat<(v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; + def : Pat<(v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; + + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))))), + (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))))), + (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))))), + (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; +} + multiclass unpred_vcmpf_z { def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; @@ -3311,7 +3336,7 @@ (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmpf_r { +multiclass unpred_vcmpf_r { def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), @@ -3322,6 +3347,11 @@ def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; + def : Pat<(v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed))>; + def : Pat<(v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed))>; + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), @@ -3331,6 +3361,11 @@ (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; + + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))))), + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))))), + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; } let Predicates = [HasMVEInt] in { @@ -3343,12 +3378,12 @@ defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; - defm MVE_VCEQ : unpred_vcmp_r<"i", 0>; - defm MVE_VCNE : unpred_vcmp_r<"i", 1>; - defm MVE_VCGE : unpred_vcmp_r<"s", 10>; - defm MVE_VCLT : unpred_vcmp_r<"s", 11>; - defm MVE_VCGT : unpred_vcmp_r<"s", 12>; - defm MVE_VCLE : unpred_vcmp_r<"s", 13>; + defm MVE_VCEQ : unpred_vcmp_r_reversible<"i", 0, 0>; + defm MVE_VCNE : unpred_vcmp_r_reversible<"i", 1, 1>; + defm MVE_VCGE : unpred_vcmp_r_reversible<"s", 10, 13>; + defm MVE_VCLT : unpred_vcmp_r_reversible<"s", 11, 12>; + defm MVE_VCGT : unpred_vcmp_r_reversible<"s", 12, 11>; + defm MVE_VCLE : unpred_vcmp_r_reversible<"s", 13, 10>; defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; } @@ -3361,12 +3396,12 @@ defm MVE_VFCGTZ : unpred_vcmpf_z<12>; defm MVE_VFCLEZ : unpred_vcmpf_z<13>; - defm MVE_VFCEQ : unpred_vcmpf_r<0>; - defm MVE_VFCNE : unpred_vcmpf_r<1>; - defm MVE_VFCGE : unpred_vcmpf_r<10>; - defm MVE_VFCLT : unpred_vcmpf_r<11>; - defm MVE_VFCGT : unpred_vcmpf_r<12>; - defm MVE_VFCLE : unpred_vcmpf_r<13>; + defm MVE_VFCEQ : unpred_vcmpf_r<0, 0>; + defm MVE_VFCNE : unpred_vcmpf_r<1, 1>; + defm MVE_VFCGE : unpred_vcmpf_r<10, 13>; + defm MVE_VFCLT : unpred_vcmpf_r<11, 12>; + defm MVE_VFCGT : unpred_vcmpf_r<12, 11>; + defm MVE_VFCLE : unpred_vcmpf_r<13, 10>; } diff --git a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll --- a/llvm/test/CodeGen/Thumb2/mve-pred-and.ll +++ b/llvm/test/CodeGen/Thumb2/mve-pred-and.ll @@ -338,9 +338,8 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsltr_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-LABEL: cmpsltr_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.32 q2, r0 ; CHECK-NEXT: vpt.i32 eq, q0, zr -; CHECK-NEXT: vcmpt.s32 gt, q2, q1 +; CHECK-NEXT: vcmpt.s32 lt, q1, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: @@ -373,9 +372,8 @@ define arm_aapcs_vfpcc <4 x i32> @cmpsler_v4i1(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-LABEL: cmpsler_v4i1: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.32 q2, r0 ; CHECK-NEXT: vpt.i32 eq, q0, zr -; CHECK-NEXT: vcmpt.s32 ge, q2, q1 +; CHECK-NEXT: vcmpt.s32 le, q1, r0 ; CHECK-NEXT: vpsel q0, q0, q1 ; CHECK-NEXT: bx lr entry: diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpfr.ll @@ -111,8 +111,7 @@ ; CHECK-MVEFP-LABEL: vcmp_one_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 @@ -279,8 +278,7 @@ ; CHECK-MVEFP-LABEL: vcmp_olt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -335,8 +333,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ole_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -401,8 +398,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ueq_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -513,8 +509,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ugt_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vcmp.f32 ge, q1, q0 +; CHECK-MVEFP-NEXT: vcmp.f32 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -570,8 +565,7 @@ ; CHECK-MVEFP-LABEL: vcmp_uge_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vcmp.f32 gt, q1, q0 +; CHECK-MVEFP-NEXT: vcmp.f32 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -739,8 +733,7 @@ ; CHECK-MVEFP-LABEL: vcmp_ord_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 @@ -797,8 +790,7 @@ ; CHECK-MVEFP-LABEL: vcmp_uno_v4f32: ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vmov r0, s4 -; CHECK-MVEFP-NEXT: vdup.32 q1, r0 -; CHECK-MVEFP-NEXT: vpt.f32 le, q1, q0 +; CHECK-MVEFP-NEXT: vpt.f32 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f32 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q2, q3 ; CHECK-MVEFP-NEXT: bx lr @@ -1068,8 +1060,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -1449,8 +1440,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -1576,8 +1566,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q3, q0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr entry: @@ -1719,8 +1708,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -1973,8 +1961,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vcmp.f16 ge, q3, q0 +; CHECK-MVEFP-NEXT: vcmp.f16 le, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2101,8 +2088,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vcmp.f16 gt, q3, q0 +; CHECK-MVEFP-NEXT: vcmp.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr @@ -2483,8 +2469,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpnot ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 @@ -2612,8 +2597,7 @@ ; CHECK-MVEFP: @ %bb.0: @ %entry ; CHECK-MVEFP-NEXT: vldr.16 s12, [r0] ; CHECK-MVEFP-NEXT: vmov r0, s12 -; CHECK-MVEFP-NEXT: vdup.16 q3, r0 -; CHECK-MVEFP-NEXT: vpt.f16 le, q3, q0 +; CHECK-MVEFP-NEXT: vpt.f16 ge, q0, r0 ; CHECK-MVEFP-NEXT: vcmpt.f16 lt, q0, r0 ; CHECK-MVEFP-NEXT: vpsel q0, q1, q2 ; CHECK-MVEFP-NEXT: bx lr diff --git a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcmpr.ll @@ -60,8 +60,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcmp_slt_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_slt_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.32 q3, r0 -; CHECK-NEXT: vcmp.s32 gt, q3, q0 +; CHECK-NEXT: vcmp.s32 lt, q0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -75,8 +74,7 @@ define arm_aapcs_vfpcc <4 x i32> @vcmp_sle_v4i32(<4 x i32> %src, i32 %src2, <4 x i32> %a, <4 x i32> %b) { ; CHECK-LABEL: vcmp_sle_v4i32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.32 q3, r0 -; CHECK-NEXT: vcmp.s32 ge, q3, q0 +; CHECK-NEXT: vcmp.s32 le, q0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -205,8 +203,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcmp_slt_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_slt_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.16 q3, r0 -; CHECK-NEXT: vcmp.s16 gt, q3, q0 +; CHECK-NEXT: vcmp.s16 lt, q0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -220,8 +217,7 @@ define arm_aapcs_vfpcc <8 x i16> @vcmp_sle_v8i16(<8 x i16> %src, i16 %src2, <8 x i16> %a, <8 x i16> %b) { ; CHECK-LABEL: vcmp_sle_v8i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.16 q3, r0 -; CHECK-NEXT: vcmp.s16 ge, q3, q0 +; CHECK-NEXT: vcmp.s16 le, q0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -350,8 +346,7 @@ define arm_aapcs_vfpcc <16 x i8> @vcmp_slt_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_slt_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.8 q3, r0 -; CHECK-NEXT: vcmp.s8 gt, q3, q0 +; CHECK-NEXT: vcmp.s8 lt, q0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: @@ -365,8 +360,7 @@ define arm_aapcs_vfpcc <16 x i8> @vcmp_sle_v16i8(<16 x i8> %src, i8 %src2, <16 x i8> %a, <16 x i8> %b) { ; CHECK-LABEL: vcmp_sle_v16i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vdup.8 q3, r0 -; CHECK-NEXT: vcmp.s8 ge, q3, q0 +; CHECK-NEXT: vcmp.s8 le, q0, r0 ; CHECK-NEXT: vpsel q0, q1, q2 ; CHECK-NEXT: bx lr entry: