diff --git a/llvm/lib/Target/ARM/ARMInstrInfo.td b/llvm/lib/Target/ARM/ARMInstrInfo.td --- a/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -297,6 +297,23 @@ string Constraints = C; } +// ARMCC condition codes. See ARMCC::CondCodes +def ARMCCeq : PatLeaf<(i32 0)>; +def ARMCCne : PatLeaf<(i32 1)>; +def ARMCChs : PatLeaf<(i32 2)>; +def ARMCClo : PatLeaf<(i32 3)>; +def ARMCCmi : PatLeaf<(i32 4)>; +def ARMCCpl : PatLeaf<(i32 5)>; +def ARMCCvs : PatLeaf<(i32 6)>; +def ARMCCvc : PatLeaf<(i32 7)>; +def ARMCChi : PatLeaf<(i32 8)>; +def ARMCCls : PatLeaf<(i32 9)>; +def ARMCCge : PatLeaf<(i32 10)>; +def ARMCClt : PatLeaf<(i32 11)>; +def ARMCCgt : PatLeaf<(i32 12)>; +def ARMCCle : PatLeaf<(i32 13)>; +def ARMCCal : PatLeaf<(i32 14)>; + //===----------------------------------------------------------------------===// // ARM specific transformation functions and pattern fragments. // diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1979,17 +1979,17 @@ // the following vectorized expression (r being the value in $reg): // r > 0 ? r : (r == INT_MIN ? INT_MAX : -r) def : Pat<(VTI.Vec (vselect - (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), (i32 12))), + (VTI.Pred (ARMvcmpz (VTI.Vec MQPR:$reg), ARMCCgt)), (VTI.Vec MQPR:$reg), (VTI.Vec (vselect - (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))), + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)), int_max, (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))))), (VTI.Vec (vqabs_instruction (VTI.Vec MQPR:$reg)))>; // Similarly, this tree represents vqneg, i.e. the following vectorized expression: // r == INT_MIN ? INT_MAX : -r def : Pat<(VTI.Vec (vselect - (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, (i32 0))), + (VTI.Pred (ARMvcmp (VTI.Vec MQPR:$reg), int_min, ARMCCeq)), int_max, (sub (VTI.Vec zero_vec), (VTI.Vec MQPR:$reg)))), (VTI.Vec (vqneg_instruction (VTI.Vec MQPR:$reg)))>; @@ -3346,155 +3346,120 @@ def MVE_VCMPs16r : MVE_VCMPqrs<"s16", 0b01>; def MVE_VCMPs32r : MVE_VCMPqrs<"s32", 0b10>; -multiclass unpred_vcmp_z { - def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))), +multiclass unpred_vcmp_z { + def i8 : Pat<(v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc))>; - def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))), + def i16 : Pat<(v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc))>; - def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))), + def i32 : Pat<(v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), (i32 fc))))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmpz (v16i8 MQPR:$v1), fc)))), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), (i32 fc))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8i16 MQPR:$v1), fc)))), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), (i32 fc))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4i32 MQPR:$v1), fc)))), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmp_r { - def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))), +multiclass unpred_vcmp_r { + def i8 : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)), (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc))>; - def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))), + def i16 : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)), (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc))>; - def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))), + def i32 : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)), (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc))>; - def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))), + def i8r : Pat<(v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))), + def i16r : Pat<(v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc))>; - def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))), + def i32r : Pat<(v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), (i32 fc))))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc)))), (v16i1 (!cast("MVE_VCMP"#suffix#"8") (v16i8 MQPR:$v1), (v16i8 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), (i32 fc))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc)))), (v8i1 (!cast("MVE_VCMP"#suffix#"16") (v8i16 MQPR:$v1), (v8i16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), (i32 fc))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc)))), (v4i1 (!cast("MVE_VCMP"#suffix#"32") (v4i32 MQPR:$v1), (v4i32 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), (i32 fc))))), + def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 MQPR:$v1), (v16i8 (ARMvdup GPR:$v2)), fc)))), (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), (i32 fc))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 MQPR:$v1), (v8i16 (ARMvdup GPR:$v2)), fc)))), (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), (i32 fc))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 MQPR:$v1), (v4i32 (ARMvdup GPR:$v2)), fc)))), (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v1), (i32 GPR:$v2), fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmp_r_reversible { - defm "": unpred_vcmp_r; - - // Additional patterns that match the vector/scalar comparisons the - // opposite way round, with the ARMvdup in the first operand of the - // ARMvcmp. These will usually need a different condition code - // (except for the symmetric conditions EQ and NE). They're in a - // separate multiclass because the unsigned CS and HI comparisons - // don't have reversed forms. - - def : Pat<(v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; - def : Pat<(v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; - def : Pat<(v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed))>; - - def : Pat<(v16i1 (and (v16i1 VCCR:$p1), (v16i1 (ARMvcmp (v16i8 (ARMvdup GPR:$v1)), (v16i8 MQPR:$v2), (i32 fc))))), - (v16i1 (!cast("MVE_VCMP"#suffix#"8r") (v16i8 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8i16 (ARMvdup GPR:$v1)), (v8i16 MQPR:$v2), (i32 fc))))), - (v8i1 (!cast("MVE_VCMP"#suffix#"16r") (v8i16 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4i32 (ARMvdup GPR:$v1)), (v4i32 MQPR:$v2), (i32 fc))))), - (v4i1 (!cast("MVE_VCMP"#suffix#"32r") (v4i32 MQPR:$v2), (i32 GPR:$v1), fcReversed, 1, VCCR:$p1))>; -} - -multiclass unpred_vcmpf_z { - def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))), +multiclass unpred_vcmpf_z { + def f16 : Pat<(v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, fc))>; - def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))), + def f32 : Pat<(v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), (i32 fc))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmpz (v8f16 MQPR:$v1), fc)))), (v8i1 (MVE_VCMPf32r (v8f16 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), (i32 fc))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmpz (v4f32 MQPR:$v1), fc)))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, fc, 1, VCCR:$p1))>; } -multiclass unpred_vcmpf_r { - def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))), +multiclass unpred_vcmpf_r { + def f16 : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc))>; - def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))), + def f32 : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc))>; - def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))), + def f16r : Pat<(v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc))>; - def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))), + def f32r : Pat<(v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc))>; - def : Pat<(v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed))>; - def : Pat<(v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed))>; - - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), (i32 fc))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc)))), (v8i1 (MVE_VCMPf16 (v8f16 MQPR:$v1), (v8f16 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), (i32 fc))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc)))), (v4i1 (MVE_VCMPf32 (v4f32 MQPR:$v1), (v4f32 MQPR:$v2), fc, 1, VCCR:$p1))>; - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), (i32 fc))))), + def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 MQPR:$v1), (v8f16 (ARMvdup HPR:$v2)), fc)))), (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f16 HPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), (i32 fc))))), + def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 MQPR:$v1), (v4f32 (ARMvdup SPR:$v2)), fc)))), (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), (i32 (COPY_TO_REGCLASS (f32 SPR:$v2), rGPR)), fc, 1, VCCR:$p1))>; - - def : Pat<(v8i1 (and (v8i1 VCCR:$p1), (v8i1 (ARMvcmp (v8f16 (ARMvdup HPR:$v1)), (v8f16 MQPR:$v2), (i32 fc))))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f16 HPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; - def : Pat<(v4i1 (and (v4i1 VCCR:$p1), (v4i1 (ARMvcmp (v4f32 (ARMvdup SPR:$v1)), (v4f32 MQPR:$v2), (i32 fc))))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v2), (i32 (COPY_TO_REGCLASS (f32 SPR:$v1), rGPR)), fcReversed, 1, VCCR:$p1))>; } let Predicates = [HasMVEInt] in { - defm MVE_VCEQZ : unpred_vcmp_z<"i", 0>; - defm MVE_VCNEZ : unpred_vcmp_z<"i", 1>; - defm MVE_VCGEZ : unpred_vcmp_z<"s", 10>; - defm MVE_VCLTZ : unpred_vcmp_z<"s", 11>; - defm MVE_VCGTZ : unpred_vcmp_z<"s", 12>; - defm MVE_VCLEZ : unpred_vcmp_z<"s", 13>; - defm MVE_VCGTUZ : unpred_vcmp_z<"u", 8>; - defm MVE_VCGEUZ : unpred_vcmp_z<"u", 2>; - - defm MVE_VCEQ : unpred_vcmp_r_reversible<"i", 0, 0>; - defm MVE_VCNE : unpred_vcmp_r_reversible<"i", 1, 1>; - defm MVE_VCGE : unpred_vcmp_r_reversible<"s", 10, 13>; - defm MVE_VCLT : unpred_vcmp_r_reversible<"s", 11, 12>; - defm MVE_VCGT : unpred_vcmp_r_reversible<"s", 12, 11>; - defm MVE_VCLE : unpred_vcmp_r_reversible<"s", 13, 10>; - defm MVE_VCGTU : unpred_vcmp_r<"u", 8>; - defm MVE_VCGEU : unpred_vcmp_r<"u", 2>; + defm MVE_VCEQZ : unpred_vcmp_z<"i", ARMCCeq>; + defm MVE_VCNEZ : unpred_vcmp_z<"i", ARMCCne>; + defm MVE_VCGEZ : unpred_vcmp_z<"s", ARMCCge>; + defm MVE_VCLTZ : unpred_vcmp_z<"s", ARMCClt>; + defm MVE_VCGTZ : unpred_vcmp_z<"s", ARMCCgt>; + defm MVE_VCLEZ : unpred_vcmp_z<"s", ARMCCle>; + defm MVE_VCGTUZ : unpred_vcmp_z<"u", ARMCChi>; + defm MVE_VCGEUZ : unpred_vcmp_z<"u", ARMCChs>; + + defm MVE_VCEQ : unpred_vcmp_r<"i", ARMCCeq>; + defm MVE_VCNE : unpred_vcmp_r<"i", ARMCCne>; + defm MVE_VCGE : unpred_vcmp_r<"s", ARMCCge>; + defm MVE_VCLT : unpred_vcmp_r<"s", ARMCClt>; + defm MVE_VCGT : unpred_vcmp_r<"s", ARMCCgt>; + defm MVE_VCLE : unpred_vcmp_r<"s", ARMCCle>; + defm MVE_VCGTU : unpred_vcmp_r<"u", ARMCChi>; + defm MVE_VCGEU : unpred_vcmp_r<"u", ARMCChs>; } let Predicates = [HasMVEFloat] in { - defm MVE_VFCEQZ : unpred_vcmpf_z<0>; - defm MVE_VFCNEZ : unpred_vcmpf_z<1>; - defm MVE_VFCGEZ : unpred_vcmpf_z<10>; - defm MVE_VFCLTZ : unpred_vcmpf_z<11>; - defm MVE_VFCGTZ : unpred_vcmpf_z<12>; - defm MVE_VFCLEZ : unpred_vcmpf_z<13>; + defm MVE_VFCEQZ : unpred_vcmpf_z; + defm MVE_VFCNEZ : unpred_vcmpf_z; + defm MVE_VFCGEZ : unpred_vcmpf_z; + defm MVE_VFCLTZ : unpred_vcmpf_z; + defm MVE_VFCGTZ : unpred_vcmpf_z; + defm MVE_VFCLEZ : unpred_vcmpf_z; - defm MVE_VFCEQ : unpred_vcmpf_r<0, 0>; - defm MVE_VFCNE : unpred_vcmpf_r<1, 1>; - defm MVE_VFCGE : unpred_vcmpf_r<10, 13>; - defm MVE_VFCLT : unpred_vcmpf_r<11, 12>; - defm MVE_VFCGT : unpred_vcmpf_r<12, 11>; - defm MVE_VFCLE : unpred_vcmpf_r<13, 10>; + defm MVE_VFCEQ : unpred_vcmpf_r; + defm MVE_VFCNE : unpred_vcmpf_r; + defm MVE_VFCGE : unpred_vcmpf_r; + defm MVE_VFCLT : unpred_vcmpf_r; + defm MVE_VFCGT : unpred_vcmpf_r; + defm MVE_VFCLE : unpred_vcmpf_r; } @@ -5174,20 +5139,20 @@ def : Pat<(v16i8 (vselect (v16i8 MQPR:$pred), (v16i8 MQPR:$v1), (v16i8 MQPR:$v2))), (v16i8 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), 1)))>; + (MVE_VCMPi8 (v16i8 MQPR:$pred), (MVE_VMOVimmi8 0), ARMCCne)))>; def : Pat<(v8i16 (vselect (v8i16 MQPR:$pred), (v8i16 MQPR:$v1), (v8i16 MQPR:$v2))), (v8i16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>; def : Pat<(v4i32 (vselect (v4i32 MQPR:$pred), (v4i32 MQPR:$v1), (v4i32 MQPR:$v2))), (v4i32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>; def : Pat<(v8f16 (vselect (v8i16 MQPR:$pred), (v8f16 MQPR:$v1), (v8f16 MQPR:$v2))), (v8f16 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), 1)))>; + (MVE_VCMPi16 (v8i16 MQPR:$pred), (MVE_VMOVimmi16 0), ARMCCne)))>; def : Pat<(v4f32 (vselect (v4i32 MQPR:$pred), (v4f32 MQPR:$v1), (v4f32 MQPR:$v2))), (v4f32 (MVE_VPSEL MQPR:$v1, MQPR:$v2, 0, - (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), 1)))>; + (MVE_VCMPi32 (v4i32 MQPR:$pred), (MVE_VMOVimmi32 0), ARMCCne)))>; // Pred <-> Int def : Pat<(v16i8 (zext (v16i1 VCCR:$pred))), @@ -5212,11 +5177,11 @@ (v4i32 (MVE_VPSEL (MVE_VMOVimmi32 1), (MVE_VMOVimmi32 0), 0, VCCR:$pred))>; def : Pat<(v16i1 (trunc (v16i8 MQPR:$v1))), - (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, 1))>; + (v16i1 (MVE_VCMPi32r (v16i8 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (trunc (v8i16 MQPR:$v1))), - (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPi32r (v8i16 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v4i1 (trunc (v4i32 MQPR:$v1))), - (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPi32r (v4i32 MQPR:$v1), ZR, ARMCCne))>; } let Predicates = [HasMVEFloat] in { @@ -5235,13 +5200,13 @@ (v8f16 (MVE_VPSEL (v8f16 (MVE_VMOVimmi16 2748)), (v8f16 (MVE_VMOVimmi16 0)), 0, VCCR:$pred))>; def : Pat<(v4i1 (fp_to_uint (v4f32 MQPR:$v1))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (fp_to_uint (v8f16 MQPR:$v1))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v4i1 (fp_to_sint (v4f32 MQPR:$v1))), - (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, 1))>; + (v4i1 (MVE_VCMPf32r (v4f32 MQPR:$v1), ZR, ARMCCne))>; def : Pat<(v8i1 (fp_to_sint (v8f16 MQPR:$v1))), - (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, 1))>; + (v8i1 (MVE_VCMPf16r (v8f16 MQPR:$v1), ZR, ARMCCne))>; } def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td --- a/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -3314,30 +3314,30 @@ // source operand element sizes of 8, 16 and 32 bits: multiclass N2V_QHS_cmp op24_23, bits<2> op21_20, bits<2> op17_16, bits<5> op11_7, bit op4, string opc, string Dt, - string asm, int fc> { + string asm, PatFrag fc> { // 64-bit vector types. def v8i8 : N2V; + [(set DPR:$Vd, (v8i8 (ARMvcmpz (v8i8 DPR:$Vm), fc)))]>; def v4i16 : N2V; + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4i16 DPR:$Vm), fc)))]>; def v2i32 : N2V; + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2i32 DPR:$Vm), fc)))]>; def v2f32 : N2V { + [(set DPR:$Vd, (v2i32 (ARMvcmpz (v2f32 DPR:$Vm), fc)))]> { let Inst{10} = 1; // overwrite F = 1 } def v4f16 : N2V, + [(set DPR:$Vd, (v4i16 (ARMvcmpz (v4f16 DPR:$Vm), fc)))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3346,25 +3346,25 @@ def v16i8 : N2V; + [(set QPR:$Vd, (v16i8 (ARMvcmpz (v16i8 QPR:$Vm), fc)))]>; def v8i16 : N2V; + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8i16 QPR:$Vm), fc)))]>; def v4i32 : N2V; + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4i32 QPR:$Vm), fc)))]>; def v4f32 : N2V { + [(set QPR:$Vd, (v4i32 (ARMvcmpz (v4f32 QPR:$Vm), fc)))]> { let Inst{10} = 1; // overwrite F = 1 } def v8f16 : N2V, + [(set QPR:$Vd, (v8i16 (ARMvcmpz (v8f16 QPR:$Vm), fc)))]>, Requires<[HasNEON,HasFullFP16]> { let Inst{10} = 1; // overwrite F = 1 } @@ -3373,11 +3373,11 @@ // Neon 3-register comparisons. class N3VQ_cmp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> : N3V { + [(set QPR:$Vd, (ResTy (ARMvcmp (OpTy QPR:$Vn), (OpTy QPR:$Vm), fc)))]> { // All of these have a two-operand InstAlias. let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; @@ -3385,11 +3385,11 @@ class N3VD_cmp op21_20, bits<4> op11_8, bit op4, InstrItinClass itin, string OpcodeStr, string Dt, - ValueType ResTy, ValueType OpTy, int fc, bit Commutable> + ValueType ResTy, ValueType OpTy, PatFrag fc, bit Commutable> : N3V { + [(set DPR:$Vd, (ResTy (ARMvcmp (OpTy DPR:$Vn), (OpTy DPR:$Vm), fc)))]> { // All of these have a two-operand InstAlias. let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; @@ -3399,7 +3399,7 @@ InstrItinClass itinD16, InstrItinClass itinD32, InstrItinClass itinQ16, InstrItinClass itinQ32, string OpcodeStr, string Dt, - int fc, bit Commutable = 0> { + PatFrag fc, bit Commutable = 0> { // 64-bit vector types. def v8i8 : N3VD_cmp; + IIC_VSUBi4Q, "vceq", "i", ARMCCeq, 1>; def VCEQfd : N3VD_cmp<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32, - 0, 1>; + ARMCCeq, 1>; def VCEQfq : N3VQ_cmp<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32, - 0, 1>; + ARMCCeq, 1>; def VCEQhd : N3VD_cmp<0,0,0b01,0b1110,0, IIC_VBIND, "vceq", "f16", v4i16, v4f16, - 0, 1>, + ARMCCeq, 1>, Requires<[HasNEON, HasFullFP16]>; def VCEQhq : N3VQ_cmp<0,0,0b01,0b1110,0, IIC_VBINQ, "vceq", "f16", v8i16, v8f16, - 0, 1>, + ARMCCeq, 1>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i", - "$Vd, $Vm, #0", 0>; + "$Vd, $Vm, #0", ARMCCeq>; // VCGE : Vector Compare Greater Than or Equal defm VCGEs : N3V_QHS_cmp<0, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "s", 10, 0>; + IIC_VSUBi4Q, "vcge", "s", ARMCCge, 0>; defm VCGEu : N3V_QHS_cmp<1, 0, 0b0011, 1, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcge", "u", 2, 0>; + IIC_VSUBi4Q, "vcge", "u", ARMCChs, 0>; def VCGEfd : N3VD_cmp<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32", v2i32, v2f32, - 10, 0>; + ARMCCge, 0>; def VCGEfq : N3VQ_cmp<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32, - 10, 0>; + ARMCCge, 0>; def VCGEhd : N3VD_cmp<1,0,0b01,0b1110,0, IIC_VBIND, "vcge", "f16", v4i16, v4f16, - 10, 0>, + ARMCCge, 0>, Requires<[HasNEON, HasFullFP16]>; def VCGEhq : N3VQ_cmp<1,0,0b01,0b1110,0, IIC_VBINQ, "vcge", "f16", v8i16, v8f16, - 10, 0>, + ARMCCge, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s", - "$Vd, $Vm, #0", 10>; + "$Vd, $Vm, #0", ARMCCge>; defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s", - "$Vd, $Vm, #0", 13>; + "$Vd, $Vm, #0", ARMCCle>; } // VCGT : Vector Compare Greater Than defm VCGTs : N3V_QHS_cmp<0, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "s", 12, 0>; + IIC_VSUBi4Q, "vcgt", "s", ARMCCgt, 0>; defm VCGTu : N3V_QHS_cmp<1, 0, 0b0011, 0, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, - IIC_VSUBi4Q, "vcgt", "u", 8, 0>; + IIC_VSUBi4Q, "vcgt", "u", ARMCChi, 0>; def VCGTfd : N3VD_cmp<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32, - 12, 0>; + ARMCCgt, 0>; def VCGTfq : N3VQ_cmp<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32, - 12, 0>; + ARMCCgt, 0>; def VCGThd : N3VD_cmp<1,0,0b11,0b1110,0, IIC_VBIND, "vcgt", "f16", v4i16, v4f16, - 12, 0>, + ARMCCgt, 0>, Requires<[HasNEON, HasFullFP16]>; def VCGThq : N3VQ_cmp<1,0,0b11,0b1110,0, IIC_VBINQ, "vcgt", "f16", v8i16, v8f16, - 12, 0>, + ARMCCgt, 0>, Requires<[HasNEON, HasFullFP16]>; let TwoOperandAliasConstraint = "$Vm = $Vd" in { defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s", - "$Vd, $Vm, #0", 12>; + "$Vd, $Vm, #0", ARMCCgt>; defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s", - "$Vd, $Vm, #0", 11>; + "$Vd, $Vm, #0", ARMCClt>; } // VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)