diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -408,6 +408,7 @@ // in an IT block). bit thumbArithFlagSetting = 0; + bits<2> VecSize = 0; bit validForTailPredication = 0; bit retainsPreviousHalfElement = 0; bit horizontalReduction = 0; @@ -428,6 +429,7 @@ let TSFlags{21} = retainsPreviousHalfElement; let TSFlags{22} = horizontalReduction; let TSFlags{23} = doubleWidthResult; + let TSFlags{25-24} = VecSize; let Constraints = cstr; let Itinerary = itin; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -393,12 +393,13 @@ // --------- Start of base classes for the instructions themselves class MVE_MI pattern> + string ops, string cstr, bits<2> vecsize, list pattern> : Thumb2XI, Requires<[HasMVEInt]> { let D = MVEDomain; let DecoderNamespace = "MVE"; + let VecSize = vecsize; } // MVE_p is used for most predicated instructions, to add the cluster @@ -406,22 +407,22 @@ // the input predicate register. class MVE_p pattern=[]> + bits<2> vecsize, list pattern=[]> : MVE_MI { + ops, !strconcat(cstr, vpred.vpred_constraint), vecsize, pattern> { let Inst{31-29} = 0b111; let Inst{27-26} = 0b11; } class MVE_f pattern=[]> - : MVE_p { + bits<2> vecsize, list pattern=[]> + : MVE_p { let Predicates = [HasMVEFloat]; } @@ -599,11 +600,11 @@ class MVE_rDest pattern=[]> + string ops, string cstr, bits<2> vecsize, list pattern=[]> // Always use vpred_n and not vpred_r: with the output register being // a GPR and not a vector register, there can't be any question of // what to put in its inactive lanes. - : MVE_p { + : MVE_p { let Inst{25-23} = 0b101; let Inst{11-9} = 0b111; @@ -613,7 +614,7 @@ class MVE_VABAV size> : MVE_rDest<(outs rGPR:$Rda), (ins rGPR:$Rda_src, MQPR:$Qn, MQPR:$Qm), NoItinerary, "vabav", suffix, "$Rda, $Qn, $Qm", "$Rda = $Rda_src", - []> { + size, []> { bits<4> Qm; bits<4> Qn; bits<4> Rda; @@ -666,7 +667,7 @@ class MVE_VADDV size, list pattern=[]> : MVE_rDest<(outs tGPREven:$Rda), iops, NoItinerary, - iname, suffix, "$Rda, $Qm", cstr, pattern> { + iname, suffix, "$Rda, $Qm", cstr, size, pattern> { bits<3> Qm; bits<4> Rda; @@ -764,7 +765,7 @@ class MVE_VADDLV pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLo, tGPROdd:$RdaHi), iops, NoItinerary, iname, - suffix, "$RdaLo, $RdaHi, $Qm", cstr, pattern> { + suffix, "$RdaLo, $RdaHi, $Qm", cstr, 0b10, pattern> { bits<3> Qm; bits<4> RdaLo; bits<4> RdaHi; @@ -836,7 +837,7 @@ bit bit_17, bit bit_7, list pattern=[]> : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary, iname, suffix, "$RdaSrc, $Qm", - "$RdaDest = $RdaSrc", pattern> { + "$RdaDest = $RdaSrc", !if(sz, 0b01, 0b10), pattern> { bits<3> Qm; bits<4> RdaDest; @@ -897,7 +898,7 @@ class MVE_VMINMAXV size, bit bit_17, bit bit_7, list pattern=[]> : MVE_rDest<(outs rGPR:$RdaDest), (ins rGPR:$RdaSrc, MQPR:$Qm), NoItinerary, - iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", pattern> { + iname, suffix, "$RdaSrc, $Qm", "$RdaDest = $RdaSrc", size, pattern> { bits<3> Qm; bits<4> RdaDest; @@ -1020,9 +1021,10 @@ defm MVE_VMAXAV : MVE_VMINMAXAV_ty<"vmaxav", 0, "int_arm_mve_maxav">; class MVE_VMLAMLSDAV + bit sz, bit bit_28, bit A, bit X, bit bit_8, bit bit_0, + bits<2> vecsize> : MVE_rDest<(outs tGPREven:$RdaDest), iops, NoItinerary, iname, suffix, - "$RdaDest, $Qn, $Qm", cstr, []> { + "$RdaDest, $Qn, $Qm", cstr, vecsize, []> { bits<4> RdaDest; bits<3> Qm; bits<3> Qn; @@ -1050,11 +1052,11 @@ bit sz, bit bit_28, bit X, bit bit_8, bit bit_0> { def ""#x#VTI.Suffix : MVE_VMLAMLSDAV; + sz, bit_28, 0b0, X, bit_8, bit_0, VTI.Size>; def "a"#x#VTI.Suffix : MVE_VMLAMLSDAV; + sz, bit_28, 0b1, X, bit_8, bit_0, VTI.Size>; let Predicates = [HasMVEInt] in { def : Pat<(i32 (int_arm_mve_vmldava (i32 VTI.Unsigned), @@ -1255,9 +1257,9 @@ // Base class for VMLALDAV and VMLSLDAV, VRMLALDAVH, VRMLSLDAVH class MVE_VMLALDAVBase pattern=[]> + bits<2> vecsize, list pattern=[]> : MVE_rDest<(outs tGPREven:$RdaLoDest, tGPROdd:$RdaHiDest), iops, NoItinerary, - iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, pattern> { + iname, suffix, "$RdaLoDest, $RdaHiDest, $Qn, $Qm", cstr, vecsize, pattern> { bits<4> RdaLoDest; bits<4> RdaHiDest; bits<3> Qm; @@ -1285,35 +1287,35 @@ } multiclass MVE_VMLALDAVBase_A pattern=[]> { + bit sz, bit bit_28, bit X, bit bit_8, bit bit_0, + bits<2> vecsize, list pattern=[]> { def ""#x#suffix : MVE_VMLALDAVBase< iname # x, suffix, (ins MQPR:$Qn, MQPR:$Qm), "", - sz, bit_28, 0b0, X, bit_8, bit_0, pattern>; + sz, bit_28, 0b0, X, bit_8, bit_0, vecsize, pattern>; def "a"#x#suffix : MVE_VMLALDAVBase< iname # "a" # x, suffix, (ins tGPREven:$RdaLoSrc, tGPROdd:$RdaHiSrc, MQPR:$Qn, MQPR:$Qm), "$RdaLoDest = $RdaLoSrc,$RdaHiDest = $RdaHiSrc", - sz, bit_28, 0b1, X, bit_8, bit_0, pattern>; + sz, bit_28, 0b1, X, bit_8, bit_0, vecsize, pattern>; } multiclass MVE_VMLALDAVBase_AX pattern=[]> { + bit bit_8, bit bit_0, bits<2> vecsize, list pattern=[]> { defm "" : MVE_VMLALDAVBase_A; + bit_28, 0b0, bit_8, bit_0, vecsize, pattern>; defm "" : MVE_VMLALDAVBase_A; + bit_28, 0b1, bit_8, bit_0, vecsize, pattern>; } -multiclass MVE_VRMLALDAVH_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#suffix, - 0b0, 0b0, 0b1, 0b0, pattern>; - defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#suffix, - 0b0, 0b1, 0b0, 0b1, 0b0, pattern>; +multiclass MVE_VRMLALDAVH_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vrmlaldavh", "s"#VTI.BitsSuffix, + 0b0, 0b0, 0b1, 0b0, VTI.Size, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vrmlaldavh", "", "u"#VTI.BitsSuffix, + 0b0, 0b1, 0b0, 0b1, 0b0, VTI.Size, pattern>; } -defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi<"32">; +defm MVE_VRMLALDAVH : MVE_VRMLALDAVH_multi; // vrmlalvh aliases for vrmlaldavh def : MVEInstAlias<"vrmlalvh${vp}.s32\t$RdaLo, $RdaHi, $Qn, $Qm", @@ -1333,14 +1335,15 @@ tGPREven:$RdaLo, tGPROdd:$RdaHi, MQPR:$Qn, MQPR:$Qm, vpred_n:$vp)>; -multiclass MVE_VMLALDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#suffix, sz, 0b0, 0b0, 0b0, pattern>; - defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#suffix, - sz, 0b1, 0b0, 0b0, 0b0, pattern>; +multiclass MVE_VMLALDAV_multi pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX<"vmlaldav", "s"#VTI.BitsSuffix, + VTI.Size{1}, 0b0, 0b0, 0b0, VTI.Size, pattern>; + defm "" : MVE_VMLALDAVBase_A<"vmlaldav", "", "u"#VTI.BitsSuffix, + VTI.Size{1}, 0b1, 0b0, 0b0, 0b0, VTI.Size, pattern>; } -defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"16", 0b0>; -defm MVE_VMLALDAV : MVE_VMLALDAV_multi<"32", 0b1>; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi; +defm MVE_VMLALDAV : MVE_VMLALDAV_multi; let Predicates = [HasMVEInt] in { def : Pat<(ARMVMLALVs (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)), @@ -1393,22 +1396,22 @@ } multiclass MVE_VMLSLDAV_multi pattern=[]> { - defm "" : MVE_VMLALDAVBase_AX; + bit bit_28, bits<2> vecsize, list pattern=[]> { + defm "" : MVE_VMLALDAVBase_AX; } -defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0>; -defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0>; -defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s16", 0b0, 0b0, 0b01>; +defm MVE_VMLSLDAV : MVE_VMLSLDAV_multi<"vmlsldav", "s32", 0b1, 0b0, 0b10>; +defm MVE_VRMLSLDAVH : MVE_VMLSLDAV_multi<"vrmlsldavh", "s32", 0b0, 0b1, 0b10>; // end of mve_rDest instructions // start of mve_comp instructions class MVE_comp pattern=[]> + string cstr, bits<2> vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), itin, iname, suffix, - "$Qd, $Qn, $Qm", vpred_r, cstr, pattern> { + "$Qd, $Qn, $Qm", vpred_r, cstr, vecsize, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -1425,15 +1428,15 @@ let Inst{0} = 0b0; } -class MVE_VMINMAXNM sz, bit bit_21, list pattern=[]> - : MVE_comp { + : MVE_comp { let Inst{28} = 0b1; let Inst{25-24} = 0b11; let Inst{23} = 0b0; let Inst{21} = bit_21; - let Inst{20} = sz; + let Inst{20} = sz{0}; let Inst{11} = 0b1; let Inst{8} = 0b1; let Inst{6} = 0b1; @@ -1444,7 +1447,7 @@ } multiclass MVE_VMINMAXNM_m { - def "" : MVE_VMINMAXNM; + def "" : MVE_VMINMAXNM; let Predicates = [HasMVEFloat] in { defm : MVE_TwoOpPattern(NAME)>; @@ -1459,7 +1462,7 @@ class MVE_VMINMAX size, bit bit_4, list pattern=[]> - : MVE_comp { + : MVE_comp { let Inst{28} = U; let Inst{25-24} = 0b11; @@ -1505,8 +1508,8 @@ // start of mve_bit instructions class MVE_bit_arith pattern=[]> - : MVE_p { + string ops, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qm; @@ -1517,7 +1520,7 @@ } def MVE_VBIC : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), - "vbic", "", "$Qd, $Qn, $Qm", ""> { + "vbic", "", "$Qd, $Qn, $Qm", "", 0b00> { bits<4> Qn; let Inst{28} = 0b0; @@ -1533,9 +1536,10 @@ let validForTailPredication = 1; } -class MVE_VREV size, bits<2> bit_8_7, string cstr=""> +class MVE_VREV size, bits<2> bit_8_7, + bits<2> vecsize, string cstr=""> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, - suffix, "$Qd, $Qm", cstr> { + suffix, "$Qd, $Qm", cstr, vecsize> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; @@ -1549,14 +1553,14 @@ let Inst{0} = 0b0; } -def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, "@earlyclobber $Qd">; -def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, "@earlyclobber $Qd">; -def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, "@earlyclobber $Qd">; +def MVE_VREV64_8 : MVE_VREV<"vrev64", "8", 0b00, 0b00, 0b11, "@earlyclobber $Qd">; +def MVE_VREV64_16 : MVE_VREV<"vrev64", "16", 0b01, 0b00, 0b11, "@earlyclobber $Qd">; +def MVE_VREV64_32 : MVE_VREV<"vrev64", "32", 0b10, 0b00, 0b11, "@earlyclobber $Qd">; -def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01>; -def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01>; +def MVE_VREV32_8 : MVE_VREV<"vrev32", "8", 0b00, 0b01, 0b10>; +def MVE_VREV32_16 : MVE_VREV<"vrev32", "16", 0b01, 0b01, 0b10>; -def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10>; +def MVE_VREV16_8 : MVE_VREV<"vrev16", "8", 0b00, 0b10, 0b01>; let Predicates = [HasMVEInt] in { def : Pat<(v8i16 (bswap (v8i16 MQPR:$src))), @@ -1591,7 +1595,7 @@ } def MVE_VMVN : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qm), - "vmvn", "", "$Qd, $Qm", ""> { + "vmvn", "", "$Qd, $Qm", "", 0b00> { let Inst{28} = 0b1; let Inst{25-23} = 0b111; let Inst{21-16} = 0b110000; @@ -1614,7 +1618,7 @@ class MVE_bit_ops bit_21_20, bit bit_28> : MVE_bit_arith<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), - iname, "", "$Qd, $Qn, $Qm", ""> { + iname, "", "$Qd, $Qn, $Qm", "", 0b00> { bits<4> Qn; let Inst{28} = bit_28; @@ -1685,9 +1689,9 @@ int_arm_mve_orn_predicated, (? ), MVE_VORN>; } -class MVE_bit_cmode +class MVE_bit_cmode vecsize> : MVE_p<(outs MQPR:$Qd), inOps, NoItinerary, - iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src"> { + iname, suffix, "$Qd, $imm", vpred_n, "$Qd = $Qd_src", vecsize> { bits<12> imm; bits<4> Qd; @@ -1710,7 +1714,7 @@ multiclass MVE_bit_cmode_p { def "" : MVE_bit_cmode { + (ins MQPR:$Qd_src, imm_type:$imm), VTI.Size> { let Inst{5} = opcode; let validForTailPredication = 1; } @@ -1802,6 +1806,7 @@ let Inst{16} = Idx{1}; let Inst{21} = Idx{0}; + let VecSize = 0b10; let Predicates = [HasFPRegsV8_1M]; } @@ -1813,6 +1818,8 @@ let Inst{16} = Idx{2}; let Inst{21} = Idx{1}; let Inst{6} = Idx{0}; + + let VecSize = 0b01; } class MVE_VMOV_lane_8 @@ -1823,6 +1830,8 @@ let Inst{21} = Idx{2}; let Inst{6} = Idx{1}; let Inst{5} = Idx{0}; + + let VecSize = 0b00; } def MVE_VMOV_from_lane_32 : MVE_VMOV_lane_32< MVE_VMOV_from_lane>; @@ -1933,7 +1942,7 @@ class MVE_int size, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, - iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", pattern> { + iname, suffix, "$Qd, $Qn, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -2351,9 +2360,9 @@ defm MVE_VHSUBu16 : MVE_VHSUB; defm MVE_VHSUBu32 : MVE_VHSUB; -class MVE_VDUP pattern=[]> +class MVE_VDUP vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins rGPR:$Rt), NoItinerary, - "vdup", suffix, "$Qd, $Rt", vpred_r, "", pattern> { + "vdup", suffix, "$Qd, $Rt", vpred_r, "", vecsize, pattern> { bits<4> Qd; bits<4> Rt; @@ -2372,9 +2381,9 @@ let validForTailPredication = 1; } -def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>; -def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1>; -def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0>; +def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0, 0b10>; +def MVE_VDUP16 : MVE_VDUP<"16", 0b0, 0b1, 0b01>; +def MVE_VDUP8 : MVE_VDUP<"8", 0b1, 0b0, 0b00>; let Predicates = [HasMVEInt] in { def : Pat<(v16i8 (ARMvdup (i32 rGPR:$elem))), @@ -2421,7 +2430,7 @@ class MVEIntSingleSrc size, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm), NoItinerary, - iname, suffix, "$Qd, $Qm", vpred_r, "", pattern> { + iname, suffix, "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; bits<4> Qm; @@ -2566,9 +2575,9 @@ MVE_VQABSs32, MVE_VQNEGs32>; class MVE_mod_imm cmode, bit op, - dag iops, list pattern=[]> + dag iops, bits<2> vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), iops, NoItinerary, iname, suffix, "$Qd, $imm", - vpred_r, "", pattern> { + vpred_r, "", vecsize, pattern> { bits<13> imm; bits<4> Qd; @@ -2591,21 +2600,21 @@ let isReMaterializable = 1 in { let isAsCheapAsAMove = 1 in { -def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm)>; -def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm)> { +def MVE_VMOVimmi8 : MVE_mod_imm<"vmov", "i8", {1,1,1,0}, 0b0, (ins nImmSplatI8:$imm), 0b00>; +def MVE_VMOVimmi16 : MVE_mod_imm<"vmov", "i16", {1,0,?,0}, 0b0, (ins nImmSplatI16:$imm), 0b01> { let Inst{9} = imm{9}; } -def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm)> { +def MVE_VMOVimmi32 : MVE_mod_imm<"vmov", "i32", {?,?,?,?}, 0b0, (ins nImmVMOVI32:$imm), 0b10> { let Inst{11-8} = imm{11-8}; } -def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm)>; -def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm)>; +def MVE_VMOVimmi64 : MVE_mod_imm<"vmov", "i64", {1,1,1,0}, 0b1, (ins nImmSplatI64:$imm), 0b11>; +def MVE_VMOVimmf32 : MVE_mod_imm<"vmov", "f32", {1,1,1,1}, 0b0, (ins nImmVMOVF32:$imm), 0b10>; } // let isAsCheapAsAMove = 1 -def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm)> { +def MVE_VMVNimmi16 : MVE_mod_imm<"vmvn", "i16", {1,0,?,0}, 0b1, (ins nImmSplatI16:$imm), 0b01> { let Inst{9} = imm{9}; } -def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm)> { +def MVE_VMVNimmi32 : MVE_mod_imm<"vmvn", "i32", {?,?,?,?}, 0b1, (ins nImmVMOVI32:$imm), 0b10> { let Inst{11-8} = imm{11-8}; } } // let isReMaterializable = 1 @@ -2642,7 +2651,7 @@ bit bit_12, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", - pattern> { + size, pattern> { bits<4> Qd; bits<4> Qm; @@ -2701,7 +2710,7 @@ def MVE_VSHLC : MVE_p<(outs rGPR:$RdmDest, MQPR:$Qd), (ins MQPR:$QdSrc, rGPR:$RdmSrc, long_shift:$imm), NoItinerary, "vshlc", "", "$QdSrc, $RdmSrc, $imm", - vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc"> { + vpred_n, "$RdmDest = $RdmSrc,$Qd = $QdSrc", 0b10> { bits<5> imm; bits<4> Qd; bits<4> RdmDest; @@ -2718,8 +2727,8 @@ class MVE_shift_imm pattern=[]> - : MVE_p { + bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qm; @@ -2733,7 +2742,7 @@ list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm), iname, suffix, "$Qd, $Qm", vpred_r, "", - pattern> { + sz, pattern> { let Inst{28} = U; let Inst{25-23} = 0b101; let Inst{21} = 0b1; @@ -2799,9 +2808,9 @@ class MVE_VSHLL_imm pattern=[]> + Operand immtype, bits<2> vecsize, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm, immtype:$imm), - iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", pattern> { + iname, suffix, "$Qd, $Qm, $imm", vpred_r, "", vecsize, pattern> { let Inst{28} = U; let Inst{25-23} = 0b101; let Inst{21} = 0b1; @@ -2822,7 +2831,7 @@ class MVE_VSHLL_imm8 pattern=[]> - : MVE_VSHLL_imm { + : MVE_VSHLL_imm { bits<3> imm; let Inst{20-19} = 0b01; let Inst{18-16} = imm; @@ -2830,7 +2839,7 @@ class MVE_VSHLL_imm16 pattern=[]> - : MVE_VSHLL_imm { + : MVE_VSHLL_imm { bits<4> imm; let Inst{20} = 0b1; let Inst{19-16} = imm; @@ -2848,7 +2857,7 @@ class MVE_VSHLL_by_lane_width size, bit U, string ops, list pattern=[]> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$Qm), - iname, suffix, ops, vpred_r, "", pattern> { + iname, suffix, ops, vpred_r, "", !if(size, 0b10, 0b01), pattern> { let Inst{28} = U; let Inst{25-23} = 0b100; let Inst{21-20} = 0b11; @@ -2910,15 +2919,15 @@ foreach top = [0, 1] in defm : MVE_VSHLL_patterns; -class MVE_shift_imm_partial +class MVE_shift_imm_partial vecsize> : MVE_shift_imm<(outs MQPR:$Qd), (ins MQPR:$QdSrc, MQPR:$Qm, imm:$imm), - iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc"> { + iname, suffix, "$Qd, $Qm, $imm", vpred_n, "$Qd = $QdSrc", vecsize> { Operand immediateType = imm; } class MVE_VxSHRN - : MVE_shift_imm_partial { + Operand imm, bits<2> vecsize> + : MVE_shift_imm_partial { bits<5> imm; let Inst{28} = bit_28; @@ -2933,35 +2942,35 @@ let retainsPreviousHalfElement = 1; } -def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> { +def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8> { +def MVE_VRSHRNi16th : MVE_VxSHRN<"vrshrnt", "i16", 0b1, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16> { +def MVE_VRSHRNi32bh : MVE_VxSHRN<"vrshrnb", "i32", 0b0, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } -def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16> { +def MVE_VRSHRNi32th : MVE_VxSHRN<"vrshrnt", "i32", 0b1, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } -def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8> { +def MVE_VSHRNi16bh : MVE_VxSHRN<"vshrnb", "i16", 0b0, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8> { +def MVE_VSHRNi16th : MVE_VxSHRN<"vshrnt", "i16", 0b1, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } -def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16> { +def MVE_VSHRNi32bh : MVE_VxSHRN<"vshrnb", "i32", 0b0, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } -def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16> { +def MVE_VSHRNi32th : MVE_VxSHRN<"vshrnt", "i32", 0b1, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } class MVE_VxQRSHRUN - : MVE_shift_imm_partial { + Operand imm, bits<2> vecsize> + : MVE_shift_imm_partial { bits<5> imm; let Inst{28} = bit_28; @@ -2977,42 +2986,42 @@ } def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN< - "vqrshrunb", "s16", 0b1, 0b0, shr_imm8> { + "vqrshrunb", "s16", 0b1, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs16th : MVE_VxQRSHRUN< - "vqrshrunt", "s16", 0b1, 0b1, shr_imm8> { + "vqrshrunt", "s16", 0b1, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQRSHRUNs32bh : MVE_VxQRSHRUN< - "vqrshrunb", "s32", 0b1, 0b0, shr_imm16> { + "vqrshrunb", "s32", 0b1, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } def MVE_VQRSHRUNs32th : MVE_VxQRSHRUN< - "vqrshrunt", "s32", 0b1, 0b1, shr_imm16> { + "vqrshrunt", "s32", 0b1, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } def MVE_VQSHRUNs16bh : MVE_VxQRSHRUN< - "vqshrunb", "s16", 0b0, 0b0, shr_imm8> { + "vqshrunb", "s16", 0b0, 0b0, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs16th : MVE_VxQRSHRUN< - "vqshrunt", "s16", 0b0, 0b1, shr_imm8> { + "vqshrunt", "s16", 0b0, 0b1, shr_imm8, 0b01> { let Inst{20-19} = 0b01; } def MVE_VQSHRUNs32bh : MVE_VxQRSHRUN< - "vqshrunb", "s32", 0b0, 0b0, shr_imm16> { + "vqshrunb", "s32", 0b0, 0b0, shr_imm16, 0b10> { let Inst{20} = 0b1; } def MVE_VQSHRUNs32th : MVE_VxQRSHRUN< - "vqshrunt", "s32", 0b0, 0b1, shr_imm16> { + "vqshrunt", "s32", 0b0, 0b1, shr_imm16, 0b10> { let Inst{20} = 0b1; } class MVE_VxQRSHRN - : MVE_shift_imm_partial { + Operand imm, bits<2> vecsize> + : MVE_shift_imm_partial { bits<5> imm; let Inst{25-23} = 0b101; @@ -3027,19 +3036,19 @@ } multiclass MVE_VxQRSHRN_types { - def s16 : MVE_VxQRSHRN { + def s16 : MVE_VxQRSHRN { let Inst{28} = 0b0; let Inst{20-19} = 0b01; } - def u16 : MVE_VxQRSHRN { + def u16 : MVE_VxQRSHRN { let Inst{28} = 0b1; let Inst{20-19} = 0b01; } - def s32 : MVE_VxQRSHRN { + def s32 : MVE_VxQRSHRN { let Inst{28} = 0b0; let Inst{20} = 0b1; } - def u32 : MVE_VxQRSHRN { + def u32 : MVE_VxQRSHRN { let Inst{28} = 0b1; let Inst{20} = 0b1; } @@ -3114,7 +3123,7 @@ class MVE_shift_by_vec size, bit bit_4, bit bit_8> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qm, MQPR:$Qn), NoItinerary, - iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", []> { + iname, suffix, "$Qd, $Qm, $Qn", vpred_r, "", size, []> { // Shift instructions which take a vector of shift counts bits<4> Qd; bits<4> Qm; @@ -3189,8 +3198,8 @@ class MVE_shift_with_imm pattern=[]> - : MVE_p { + bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qm; @@ -3213,10 +3222,10 @@ dag unsignedFlag = (?); } -class MVE_VSxI_imm +class MVE_VSxI_imm vecsize> : MVE_shift_with_imm { + "$Qd, $Qm, $imm", vpred_n, "$Qd = $Qd_src", vecsize> { bits<6> imm; let Inst{28} = 0b1; let Inst{25-24} = 0b11; @@ -3228,27 +3237,27 @@ Operand immediateType = immType; } -def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8> { +def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, shr_imm8, 0b00> { let Inst{21-19} = 0b001; } -def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16> { +def MVE_VSRIimm16 : MVE_VSxI_imm<"vsri", "16", 0b0, shr_imm16, 0b01> { let Inst{21-20} = 0b01; } -def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32> { +def MVE_VSRIimm32 : MVE_VSxI_imm<"vsri", "32", 0b0, shr_imm32, 0b10> { let Inst{21} = 0b1; } -def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7> { +def MVE_VSLIimm8 : MVE_VSxI_imm<"vsli", "8", 0b1, imm0_7, 0b00> { let Inst{21-19} = 0b001; } -def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15> { +def MVE_VSLIimm16 : MVE_VSxI_imm<"vsli", "16", 0b1, imm0_15, 0b01> { let Inst{21-20} = 0b01; } -def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31> { +def MVE_VSLIimm32 : MVE_VSxI_imm<"vsli", "32", 0b1,imm0_31, 0b10> { let Inst{21} = 0b1; } @@ -3277,7 +3286,7 @@ class MVE_VQSHL_imm : MVE_shift_with_imm<"vqshl", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", VTI_.Size> { bits<6> imm; let Inst{28} = VTI_.Unsigned; @@ -3317,7 +3326,7 @@ class MVE_VQSHLU_imm : MVE_shift_with_imm<"vqshlu", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", VTI_.Size> { bits<6> imm; let Inst{28} = 0b1; @@ -3347,7 +3356,7 @@ class MVE_VRSHR_imm : MVE_shift_with_imm<"vrshr", VTI_.Suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, immType:$imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", VTI_.Size> { bits<6> imm; let Inst{28} = VTI_.Unsigned; @@ -3421,10 +3430,10 @@ defm : MVE_shift_imm_patterns; defm : MVE_shift_imm_patterns; -class MVE_VSHR_imm +class MVE_VSHR_imm vecsize> : MVE_shift_with_imm<"vshr", suffix, (outs MQPR:$Qd), !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", vecsize> { bits<6> imm; let Inst{25-24} = 0b11; @@ -3432,40 +3441,40 @@ let Inst{10-8} = 0b000; } -def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm)> { +def MVE_VSHR_imms8 : MVE_VSHR_imm<"s8", (ins shr_imm8:$imm), 0b00> { let Inst{28} = 0b0; let Inst{21-19} = 0b001; } -def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm)> { +def MVE_VSHR_immu8 : MVE_VSHR_imm<"u8", (ins shr_imm8:$imm), 0b00> { let Inst{28} = 0b1; let Inst{21-19} = 0b001; } -def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm)> { +def MVE_VSHR_imms16 : MVE_VSHR_imm<"s16", (ins shr_imm16:$imm), 0b01> { let Inst{28} = 0b0; let Inst{21-20} = 0b01; } -def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm)> { +def MVE_VSHR_immu16 : MVE_VSHR_imm<"u16", (ins shr_imm16:$imm), 0b01> { let Inst{28} = 0b1; let Inst{21-20} = 0b01; } -def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm)> { +def MVE_VSHR_imms32 : MVE_VSHR_imm<"s32", (ins shr_imm32:$imm), 0b10> { let Inst{28} = 0b0; let Inst{21} = 0b1; } -def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm)> { +def MVE_VSHR_immu32 : MVE_VSHR_imm<"u32", (ins shr_imm32:$imm), 0b10> { let Inst{28} = 0b1; let Inst{21} = 0b1; } -class MVE_VSHL_imm +class MVE_VSHL_imm vecsize> : MVE_shift_with_imm<"vshl", suffix, (outs MQPR:$Qd), !con((ins MQPR:$Qm), imm), "$Qd, $Qm, $imm", - vpred_r, ""> { + vpred_r, "", vecsize> { bits<6> imm; let Inst{28} = 0b0; @@ -3474,15 +3483,15 @@ let Inst{10-8} = 0b101; } -def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm)> { +def MVE_VSHL_immi8 : MVE_VSHL_imm<"i8", (ins imm0_7:$imm), 0b00> { let Inst{21-19} = 0b001; } -def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm)> { +def MVE_VSHL_immi16 : MVE_VSHL_imm<"i16", (ins imm0_15:$imm), 0b01> { let Inst{21-20} = 0b01; } -def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm)> { +def MVE_VSHL_immi32 : MVE_VSHL_imm<"i32", (ins imm0_31:$imm), 0b10> { let Inst{21} = 0b1; } @@ -3526,8 +3535,8 @@ // start of MVE Floating Point instructions class MVE_float pattern=[]> - : MVE_f { + vpred_ops vpred, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_f { bits<4> Qm; let Inst{12} = 0b0; @@ -3540,7 +3549,7 @@ class MVE_VRINT op, string suffix, bits<2> size, list pattern=[]> : MVE_float { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -3587,16 +3596,16 @@ class MVEFloatArithNeon pattern=[]> - : MVE_float { + vpred_ops vpred, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_float { let Inst{20} = size; let Inst{16} = 0b0; } -class MVE_VMUL_fp pattern=[]> - : MVEFloatArithNeon size, list pattern=[]> + : MVEFloatArithNeon { + size, pattern> { bits<4> Qd; bits<4> Qn; @@ -3614,7 +3623,7 @@ multiclass MVE_VMULT_fp_m { - def "" : MVE_VMUL_fp; + def "" : MVE_VMUL_fp; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3628,10 +3637,10 @@ defm MVE_VMULf32 : MVE_VMUL_fp_m; defm MVE_VMULf16 : MVE_VMUL_fp_m; -class MVE_VCMLA - : MVEFloatArithNeon<"vcmla", suffix, size, (outs MQPR:$Qd), +class MVE_VCMLA size> + : MVEFloatArithNeon<"vcmla", suffix, size{1}, (outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, MQPR:$Qm, complexrotateop:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", []> { + "$Qd, $Qn, $Qm, $rot", vpred_n, "$Qd = $Qd_src", size, []> { bits<4> Qd; bits<4> Qn; bits<2> rot; @@ -3648,8 +3657,8 @@ let Inst{4} = 0b0; } -multiclass MVE_VCMLA_m { - def "" : MVE_VCMLA; +multiclass MVE_VCMLA_m { + def "" : MVE_VCMLA; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3671,16 +3680,16 @@ } } -defm MVE_VCMLAf16 : MVE_VCMLA_m; -defm MVE_VCMLAf32 : MVE_VCMLA_m; +defm MVE_VCMLAf16 : MVE_VCMLA_m; +defm MVE_VCMLAf32 : MVE_VCMLA_m; -class MVE_VADDSUBFMA_fp size, bit bit_4, bit bit_8, bit bit_21, dag iops=(ins), vpred_ops vpred=vpred_r, string cstr="", list pattern=[]> - : MVEFloatArithNeon { + vpred, cstr, size, pattern> { bits<4> Qd; bits<4> Qn; @@ -3698,7 +3707,7 @@ } multiclass MVE_VFMA_fp_multi { - def "" : MVE_VADDSUBFMA_fp; defvar Inst = !cast(NAME); defvar pred_int = int_arm_mve_fma_predicated; @@ -3739,7 +3748,7 @@ multiclass MVE_VADDSUB_fp_m { - def "" : MVE_VADDSUBFMA_fp { + def "" : MVE_VADDSUBFMA_fp { let validForTailPredication = 1; } defvar Inst = !cast(NAME); @@ -3760,10 +3769,10 @@ defm MVE_VSUBf32 : MVE_VSUB_fp_m; defm MVE_VSUBf16 : MVE_VSUB_fp_m; -class MVE_VCADD - : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), +class MVE_VCADD size, string cstr=""> + : MVEFloatArithNeon<"vcadd", suffix, size{1}, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), - "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, []> { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> { bits<4> Qd; bits<4> Qn; bit rot; @@ -3781,8 +3790,8 @@ let Inst{4} = 0b0; } -multiclass MVE_VCADD_m { - def "" : MVE_VCADD; +multiclass MVE_VCADD_m { + def "" : MVE_VCADD; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3802,12 +3811,12 @@ } } -defm MVE_VCADDf16 : MVE_VCADD_m; -defm MVE_VCADDf32 : MVE_VCADD_m; +defm MVE_VCADDf16 : MVE_VCADD_m; +defm MVE_VCADDf32 : MVE_VCADD_m; -class MVE_VABD_fp +class MVE_VABD_fp size> : MVE_float<"vabd", suffix, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), - "$Qd, $Qn, $Qm", vpred_r, ""> { + "$Qd, $Qn, $Qm", vpred_r, "", size> { bits<4> Qd; bits<4> Qn; @@ -3815,7 +3824,7 @@ let Inst{25-23} = 0b110; let Inst{22} = Qd{3}; let Inst{21} = 0b1; - let Inst{20} = size; + let Inst{20} = size{0}; let Inst{19-17} = Qn{2-0}; let Inst{16} = 0b0; let Inst{15-13} = Qd{2-0}; @@ -3827,7 +3836,7 @@ multiclass MVE_VABDT_fp_m { - def "" : MVE_VABD_fp; + def "" : MVE_VABD_fp; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -3847,7 +3856,7 @@ : MVE_VABDT_fp_m; defm MVE_VABDf32 : MVE_VABD_fp_m; -defm MVE_VABDf16 : MVE_VABD_fp_m; +defm MVE_VABDf16 : MVE_VABD_fp_m; let Predicates = [HasMVEFloat] in { def : Pat<(v8f16 (fabs (fsub (v8f16 MQPR:$Qm), (v8f16 MQPR:$Qn)))), @@ -3860,7 +3869,7 @@ Operand imm_operand_type> : MVE_float<"vcvt", suffix, (outs MQPR:$Qd), (ins MQPR:$Qm, imm_operand_type:$imm6), - "$Qd, $Qm, $imm6", vpred_r, "", []> { + "$Qd, $Qm, $imm6", vpred_r, "", !if(fsi, 0b10, 0b01), []> { bits<4> Qd; bits<6> imm6; @@ -3943,7 +3952,7 @@ class MVE_VCVT_fp_int_anpm size, bit op, string anpm, bits<2> rm, list pattern=[]> : MVE_float { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -4000,7 +4009,7 @@ class MVE_VCVT_fp_int size, bit toint, bit unsigned, list pattern=[]> : MVE_float<"vcvt", suffix, (outs MQPR:$Qd), - (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", pattern> { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -4063,7 +4072,7 @@ class MVE_VABSNEG_fp size, bit negate, list pattern=[]> : MVE_float { + (ins MQPR:$Qm), "$Qd, $Qm", vpred_r, "", size, pattern> { bits<4> Qd; let Inst{28} = 0b1; @@ -4102,15 +4111,15 @@ defm MVE_VNEGf32 : MVE_VABSNEG_fp_m<"vneg", fneg, int_arm_mve_neg_predicated, MVE_v4f32, 1>; -class MVE_VMAXMINNMA size, bit bit_12, list pattern=[]> : MVE_f<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qm), NoItinerary, iname, suffix, "$Qd, $Qm", vpred_n, "$Qd = $Qd_src", - pattern> { + size, pattern> { bits<4> Qd; bits<4> Qm; - let Inst{28} = size; + let Inst{28} = size{0}; let Inst{25-23} = 0b100; let Inst{22} = Qd{3}; let Inst{21-16} = 0b111111; @@ -4129,7 +4138,7 @@ multiclass MVE_VMAXMINNMA_m { - def "" : MVE_VMAXMINNMA; + def "" : MVE_VMAXMINNMA; defvar Inst = !cast(NAME); let Predicates = [HasMVEInt] in { @@ -4163,9 +4172,9 @@ // start of MVE compares class MVE_VCMPqq bits_21_20, - VCMPPredicateOperand predtype, list pattern=[]> + VCMPPredicateOperand predtype, bits<2> vecsize, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, MQPR:$Qm, predtype:$fc), - NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", pattern> { + NoItinerary, "vcmp", suffix, "$fc, $Qn, $Qm", vpred_n, "", vecsize, pattern> { // Base class for comparing two vector registers bits<3> fc; bits<4> Qn; @@ -4200,24 +4209,24 @@ } class MVE_VCMPqqf - : MVE_VCMPqq { + : MVE_VCMPqq { let Predicates = [HasMVEFloat]; } class MVE_VCMPqqi size> - : MVE_VCMPqq { + : MVE_VCMPqq { let Inst{12} = 0b0; let Inst{0} = 0b0; } class MVE_VCMPqqu size> - : MVE_VCMPqq { + : MVE_VCMPqq { let Inst{12} = 0b0; let Inst{0} = 0b1; } class MVE_VCMPqqs size> - : MVE_VCMPqq { + : MVE_VCMPqq { let Inst{12} = 0b1; } @@ -4237,9 +4246,9 @@ def MVE_VCMPs32 : MVE_VCMPqqs<"s32", 0b10>; class MVE_VCMPqr bits_21_20, - VCMPPredicateOperand predtype, list pattern=[]> + VCMPPredicateOperand predtype, bits<2> vecsize, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins MQPR:$Qn, GPRwithZR:$Rm, predtype:$fc), - NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", pattern> { + NoItinerary, "vcmp", suffix, "$fc, $Qn, $Rm", vpred_n, "", vecsize, pattern> { // Base class for comparing a vector register with a scalar bits<3> fc; bits<4> Qn; @@ -4265,24 +4274,24 @@ } class MVE_VCMPqrf - : MVE_VCMPqr { + : MVE_VCMPqr { let Predicates = [HasMVEFloat]; } class MVE_VCMPqri size> - : MVE_VCMPqr { + : MVE_VCMPqr { let Inst{12} = 0b0; let Inst{5} = 0b0; } class MVE_VCMPqru size> - : MVE_VCMPqr { + : MVE_VCMPqr { let Inst{12} = 0b0; let Inst{5} = 0b1; } class MVE_VCMPqrs size> - : MVE_VCMPqr { + : MVE_VCMPqr { let Inst{12} = 0b1; } @@ -4490,9 +4499,9 @@ class MVE_qDest_qSrc pattern=[]> + bits<2> vecsize, list pattern=[]> : MVE_p { + ops, vpred, cstr, vecsize, pattern> { bits<4> Qd; bits<4> Qm; @@ -4507,10 +4516,11 @@ } class MVE_VQxDMLxDH size, string cstr="", list pattern=[]> + string suffix, bits<2> size, string cstr="", + list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src"#cstr, size, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -4560,14 +4570,15 @@ defm MVE_VQRDMLSDH : MVE_VQxDMLxDH_multi<"vqrdmlsdh", 0b0, 0b1, 0b1>; defm MVE_VQRDMLSDHX : MVE_VQxDMLxDH_multi<"vqrdmlsdhx", 0b1, 0b1, 0b1>; -class MVE_VCMUL +class MVE_VCMUL size, string cstr=""> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, + []> { bits<4> Qn; bits<2> rot; - let Inst{28} = size; + let Inst{28} = size{1}; let Inst{21-20} = 0b11; let Inst{19-17} = Qn{2-0}; let Inst{16} = 0b0; @@ -4580,8 +4591,8 @@ } multiclass MVE_VCMUL_m { - def "" : MVE_VCMUL; + string cstr=""> { + def "" : MVE_VCMUL; defvar Inst = !cast(NAME); let Predicates = [HasMVEFloat] in { @@ -4601,14 +4612,14 @@ } } -defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16, 0b0>; -defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, 0b1, "@earlyclobber $Qd">; +defm MVE_VCMULf16 : MVE_VCMUL_m<"vcmul", MVE_v8f16>; +defm MVE_VCMULf32 : MVE_VCMUL_m<"vcmul", MVE_v4f32, "@earlyclobber $Qd">; class MVE_VMULL bits_21_20, - bit T, string cstr, list pattern=[]> + bit T, string cstr, bits<2> vecsize, list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, vecsize, pattern> { bits<4> Qd; bits<4> Qn; bits<4> Qm; @@ -4627,9 +4638,9 @@ multiclass MVE_VMULL_m { + bit Top, bits<2> vecsize, string cstr=""> { def "" : MVE_VMULL<"vmull" # !if(Top, "t", "b"), VTI.Suffix, VTI.Unsigned, - VTI.Size, Top, cstr>; + VTI.Size, Top, cstr, vecsize>; defvar Inst = !cast(NAME); let Predicates = [HasMVEInt] in { @@ -4656,43 +4667,43 @@ // the unsigned bit switches to encoding the size. defm MVE_VMULLBs8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b01>; defm MVE_VMULLTs8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b01>; defm MVE_VMULLBs16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b10>; defm MVE_VMULLTs16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b10>; defm MVE_VMULLBs32 : MVE_VMULL_m; defm MVE_VMULLTs32 : MVE_VMULL_m; defm MVE_VMULLBu8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b01>; defm MVE_VMULLTu8 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b01>; defm MVE_VMULLBu16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b0, 0b10>; defm MVE_VMULLTu16 : MVE_VMULL_m; + int_arm_mve_mull_int_predicated, 0b1, 0b10>; defm MVE_VMULLBu32 : MVE_VMULL_m; defm MVE_VMULLTu32 : MVE_VMULL_m; defm MVE_VMULLBp8 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b0, 0b01>; defm MVE_VMULLTp8 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b1, 0b01>; defm MVE_VMULLBp16 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b0, 0b10>; defm MVE_VMULLTp16 : MVE_VMULL_m; + int_arm_mve_mull_poly_predicated, 0b1, 0b10>; let Predicates = [HasMVEInt] in { def : Pat<(v2i64 (ARMvmulls (v4i32 MQPR:$src1), (v4i32 MQPR:$src2))), @@ -4742,7 +4753,7 @@ list pattern=[]> : MVE_qDest_qSrc { + vpred_r, "", size, pattern> { bits<4> Qn; let Inst{28} = U; @@ -4807,7 +4818,7 @@ bits<2> size, bit T, list pattern=[]> : MVE_qDest_qSrc { + vpred_n, "$Qd = $Qd_src", !if(size, 0b10, 0b01), pattern> { let Inst{28} = bit_28; let Inst{21-20} = 0b11; @@ -4952,7 +4963,7 @@ dag iops_extra, vpred_ops vpred, string cstr> : MVE_qDest_qSrc { + vpred, cstr, 0b10, []> { let Inst{28} = op; let Inst{21-16} = 0b111111; let Inst{12} = T; @@ -5015,7 +5026,7 @@ string cstr=""> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm, $rot", vpred_r, cstr, size, []> { bits<4> Qn; bit rot; @@ -5063,7 +5074,7 @@ dag carryin, list pattern=[]> : MVE_qDest_qSrc { + "$Qd, $Qn, $Qm", vpred_r, "", 0b10, pattern> { bits<4> Qn; let Inst{28} = subtract; @@ -5090,7 +5101,7 @@ string cstr="", list pattern=[]> : MVE_qDest_qSrc { + vpred_r, cstr, !if(size, 0b10, 0b01), pattern> { bits<4> Qn; let Inst{28} = size; @@ -5139,8 +5150,8 @@ // start of mve_qDest_rSrc class MVE_qr_base pattern=[]> - : MVE_p { + vpred_ops vpred, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_p { bits<4> Qd; bits<4> Qn; bits<4> Rm; @@ -5156,19 +5167,19 @@ let Inst{3-0} = Rm{3-0}; } -class MVE_qDest_rSrc pattern=[]> +class MVE_qDest_rSrc vecsize, list pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qn, rGPR:$Rm), - iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, - pattern>; + iname, suffix, "$Qd, $Qn, $Rm", vpred_r, cstr, + vecsize, pattern>; -class MVE_qDestSrc_rSrc pattern=[]> +class MVE_qDestSrc_rSrc vecsize, list pattern=[]> : MVE_qr_base<(outs MQPR:$Qd), (ins MQPR:$Qd_src, MQPR:$Qn, rGPR:$Rm), - iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src", - pattern>; + iname, suffix, "$Qd, $Qn, $Rm", vpred_n, "$Qd = $Qd_src", + vecsize, pattern>; -class MVE_qDest_single_rSrc pattern=[]> +class MVE_qDest_single_rSrc vecsize, list pattern=[]> : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qd_src, rGPR:$Rm), NoItinerary, iname, - suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", pattern> { + suffix, "$Qd, $Rm", vpred_n, "$Qd = $Qd_src", vecsize, pattern> { bits<4> Qd; bits<4> Rm; @@ -5206,7 +5217,7 @@ class MVE_VADDSUB_qr size, bit bit_5, bit bit_12, bit bit_16, bit bit_28> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = bit_28; let Inst{21-20} = size; @@ -5274,7 +5285,7 @@ class MVE_VQDMULL_qr pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = size; let Inst{21-20} = 0b11; @@ -5319,12 +5330,12 @@ defm MVE_VQDMULL_qr_s32 : MVE_VQDMULL_qr_halves; class MVE_VxADDSUB_qr bits_21_20, bit subtract, - list pattern=[]> - : MVE_qDest_rSrc { + bit bit_28, bits<2> size, bit subtract, + bits<2> vecsize, list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = bit_28; - let Inst{21-20} = bits_21_20; + let Inst{21-20} = size; let Inst{16} = 0b0; let Inst{12} = subtract; let Inst{8} = 0b1; @@ -5334,7 +5345,7 @@ multiclass MVE_VHADDSUB_qr_m { - def "" : MVE_VxADDSUB_qr; + def "" : MVE_VxADDSUB_qr; defm : MVE_vec_scalar_int_pat_m(NAME), VTI, unpred_int, pred_int, 1, 1>; } @@ -5363,7 +5374,7 @@ multiclass MVE_VADDSUB_qr_f { - def "" : MVE_VxADDSUB_qr; + def "" : MVE_VxADDSUB_qr; defm : MVE_TwoOpPatternDup(NAME)>; } @@ -5382,7 +5393,7 @@ class MVE_VxSHL_qr size, bit bit_7, bit bit_17, list pattern=[]> - : MVE_qDest_single_rSrc { + : MVE_qDest_single_rSrc { let Inst{28} = U; let Inst{25-23} = 0b100; @@ -5444,7 +5455,7 @@ } class MVE_VBRSR size, list pattern=[]> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b1; let Inst{21-20} = size; @@ -5494,7 +5505,7 @@ } class MVE_VMUL_qr_int size> - : MVE_qDest_rSrc { + : MVE_qDest_rSrc { let Inst{28} = 0b0; let Inst{21-20} = size; @@ -5518,11 +5529,11 @@ defm MVE_VMUL_qr_i32 : MVE_VMUL_qr_int_m; class MVE_VxxMUL_qr bits_21_20, list pattern=[]> - : MVE_qDest_rSrc { + bit bit_28, bits<2> size, bits<2> vecsize, list pattern=[]> + : MVE_qDest_rSrc { let Inst{28} = bit_28; - let Inst{21-20} = bits_21_20; + let Inst{21-20} = size; let Inst{16} = 0b1; let Inst{12} = 0b0; let Inst{8} = 0b0; @@ -5532,7 +5543,7 @@ multiclass MVE_VxxMUL_qr_m { - def "" : MVE_VxxMUL_qr; + def "" : MVE_VxxMUL_qr; let Predicates = [HasMVEInt] in { defm : MVE_TwoOpPatternDup(NAME)>; @@ -5558,7 +5569,7 @@ multiclass MVE_VxxMUL_qr_f_m { let validForTailPredication = 1 in - def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11>; + def "" : MVE_VxxMUL_qr<"vmul", VTI.Suffix, VTI.Size{0}, 0b11, VTI.Size>; defm : MVE_TwoOpPatternDup(NAME)>; } @@ -5570,8 +5581,8 @@ class MVE_VFMAMLA_qr bits_21_20, bit S, - list pattern=[]> - : MVE_qDestSrc_rSrc { + bits<2> vecsize, list pattern=[]> + : MVE_qDestSrc_rSrc { let Inst{28} = bit_28; let Inst{21-20} = bits_21_20; @@ -5586,7 +5597,7 @@ multiclass MVE_VMLA_qr_multi { def "": MVE_VFMAMLA_qr; + scalar_addend, VTI.Size>; defvar Inst = !cast(NAME); defvar pred_int = !cast("int_arm_mve_" # iname # "_n_predicated"); defvar v1 = (VTI.Vec MQPR:$v1); @@ -5628,7 +5639,7 @@ multiclass MVE_VFMA_qr_multi { - def "": MVE_VFMAMLA_qr; + def "": MVE_VFMAMLA_qr; defvar Inst = !cast(NAME); defvar pred_int = int_arm_mve_fma_predicated; defvar v1 = (VTI.Vec MQPR:$v1); @@ -5677,7 +5688,7 @@ class MVE_VQDMLAH_qr size, bit bit_5, bit bit_12, list pattern=[]> - : MVE_qDestSrc_rSrc { + : MVE_qDestSrc_rSrc { let Inst{28} = U; let Inst{21-20} = size; @@ -5722,7 +5733,7 @@ ValueType VT, SDPatternOperator vxdup> : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn), (ins tGPREven:$Rn_src, MVE_VIDUP_imm:$imm), NoItinerary, - iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src", + iname, suffix, "$Qd, $Rn, $imm", vpred_r, "$Rn = $Rn_src", size, [(set (VT MQPR:$Qd), (i32 tGPREven:$Rn), (vxdup (i32 tGPREven:$Rn_src), (i32 imm:$imm)))]> { bits<4> Qd; @@ -5757,7 +5768,7 @@ list pattern=[]> : MVE_p<(outs MQPR:$Qd, tGPREven:$Rn), (ins tGPREven:$Rn_src, tGPROdd:$Rm, MVE_VIDUP_imm:$imm), NoItinerary, - iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src", + iname, suffix, "$Qd, $Rn, $Rm, $imm", vpred_r, "$Rn = $Rn_src", size, pattern> { bits<4> Qd; bits<4> Rm; @@ -5792,7 +5803,7 @@ let isReMaterializable = 1 in class MVE_VCTPInst size, list pattern=[]> : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix, - "$Rn", vpred_n, "", pattern> { + "$Rn", vpred_n, "", size, pattern> { bits<4> Rn; let Inst{28-27} = 0b10; @@ -5849,6 +5860,7 @@ let Inst{4} = idx2; let Inst{3-0} = Rt{3-0}; + let VecSize = 0b10; let hasSideEffects = 0; } @@ -5937,7 +5949,7 @@ bit load, dag Oops, dag loadIops, dag wbIops, string iname, string ops, string cstr, list pattern=[]> - : MVE_MI { + : MVE_MI { bits<4> VQd; bits<4> Rn; @@ -6135,8 +6147,8 @@ // input values. class MVE_VLDRSTR_base pattern=[]> - : MVE_p { + string ops, string cstr, bits<2> vecsize, list pattern=[]> + : MVE_p { bits<3> Qd; let Inst{28} = U; @@ -6172,7 +6184,7 @@ class MVE_VLDRSTR_cs - : MVE_VLDRSTR_base { + : MVE_VLDRSTR_base { bits<12> addr; let Inst{23} = addr{7}; let Inst{19-16} = addr{11-8}; @@ -6187,7 +6199,7 @@ bit P, bit W, bits<2> size, dag oops, dag iops, string asm, string suffix, IndexMode im, string ops, string cstr> - : MVE_VLDRSTR_base { + : MVE_VLDRSTR_base { bits<11> addr; let Inst{23} = addr{7}; let Inst{19} = memsz.encoding{0}; // enough to tell 16- from 32-bit @@ -6304,7 +6316,7 @@ bits<2> size, bit os, string asm, string suffix, int shift> : MVE_VLDRSTR_base:$addr)), - asm, suffix, "$Qd, $addr", dir.cstr> { + asm, suffix, "$Qd, $addr", dir.cstr, size> { bits<7> addr; let Inst{23} = 0b1; let Inst{19-16} = addr{6-3}; @@ -6437,7 +6449,7 @@ string asm, string wbAsm, string suffix, string cstr = ""> : MVE_VLDRSTR_base:$addr)), - asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr> { + asm, suffix, "$Qd, $addr" # wbAsm, cstr # dir.cstr, memsz.encoding> { bits<11> addr; let Inst{23} = addr{7}; let Inst{19-17} = addr{10-8}; @@ -6546,7 +6558,7 @@ // end of MVE predicable load/store class MVE_VPT size, dag iops, string asm, list pattern=[]> - : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", pattern> { + : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, "", size, pattern> { bits<3> fc; bits<4> Mk; bits<3> Qn; @@ -6656,7 +6668,7 @@ class MVE_VPTf pattern=[]> : MVE_MI<(outs ), iops, NoItinerary, !strconcat("vpt", "${Mk}", ".", suffix), asm, - "", pattern> { + "", !if(size, 0b01, 0b10), pattern> { bits<3> fc; bits<4> Mk; bits<3> Qn; @@ -6709,7 +6721,7 @@ def MVE_VPTv8f16r : MVE_VPTft2<"f16", 0b1>; def MVE_VPST : MVE_MI<(outs ), (ins vpt_mask:$Mk), NoItinerary, - !strconcat("vpst", "${Mk}"), "", "", []> { + !strconcat("vpst", "${Mk}"), "", "", 0b00, []> { bits<4> Mk; let Inst{31-23} = 0b111111100; @@ -6726,7 +6738,7 @@ } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, - "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", []> { + "vpsel", "", "$Qd, $Qn, $Qm", vpred_n, "", 0b00, []> { bits<4> Qn; bits<4> Qd; bits<4> Qm; @@ -6832,7 +6844,7 @@ } def MVE_VPNOT : MVE_p<(outs VCCR:$P0), (ins VCCR:$P0_in), NoItinerary, - "vpnot", "", "", vpred_n, "", []> { + "vpnot", "", "", vpred_n, "", 0b00, []> { let Inst{31-0} = 0b11111110001100010000111101001101; let Unpredictable{19-17} = 0b111; let Unpredictable{12} = 0b1; diff --git a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp --- a/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ b/llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -97,7 +97,15 @@ return Domain == ARMII::DomainMVE; } +static int getVecSize(const MachineInstr &MI) { + const MCInstrDesc &MCID = MI.getDesc(); + uint64_t Flags = MCID.TSFlags; + return (Flags & ARMII::VecSize) >> ARMII::VecSizeShift; +} + static bool shouldInspect(MachineInstr &MI) { + if (MI.isDebugInstr()) + return false; return isDomainMVE(&MI) || isVectorPredicate(&MI) || hasVPRUse(MI); } @@ -371,6 +379,7 @@ SmallVector VCTPs; SmallPtrSet ToRemove; SmallPtrSet BlockMasksToRecompute; + SmallPtrSet DoubleWidthResultInstrs; bool Revert = false; bool CannotTailPredicate = false; @@ -730,6 +739,20 @@ return false; } + // For any DoubleWidthResultInstrs we found whilst scanning instructions, they + // need to compute an output size that is smaller than the VCTP mask operates + // on. The VecSize of the DoubleWidthResult is the larger vector size - the + // size it extends into, so any VCTP VecSize <= is valid. + unsigned VCTPVecSize = getVecSize(*VCTP); + for (MachineInstr *MI : DoubleWidthResultInstrs) { + unsigned InstrVecSize = getVecSize(*MI); + if (InstrVecSize > VCTPVecSize) { + LLVM_DEBUG(dbgs() << "ARM Loops: Double width result larger than VCTP " + << "VecSize:\n" << *MI); + return false; + } + } + // Check that the value change of the element count is what we expect and // that the predication will be equivalent. For this we need: // NumElements = NumElements - VectorWidth. The sub will be a sub immediate @@ -1233,8 +1256,13 @@ bool RequiresExplicitPredication = (MCID.TSFlags & ARMII::ValidForTailPredication) == 0; if (isDomainMVE(MI) && RequiresExplicitPredication) { - LLVM_DEBUG(if (!IsUse) - dbgs() << "ARM Loops: Can't tail predicate: " << *MI); + if (!IsUse && producesDoubleWidthResult(*MI)) { + DoubleWidthResultInstrs.insert(MI); + return true; + } + + LLVM_DEBUG(if (!IsUse) dbgs() + << "ARM Loops: Can't tail predicate: " << *MI); return IsUse; } diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -408,6 +408,14 @@ // its input, typically reading from the top/bottom halves of the input(s). DoubleWidthResult = 1 << 23, + // The vector element size for MVE instructions. 00 = i8, 01 = i16, 10 = i32 + // and 11 = i64. This is the largest type if multiple are present, so a + // MVE_VMOVLs8bh is ize 01=i16, as it extends from a i8 to a i16. There are + // some caveats so cannot be used blindly, such as exchanging VMLADAVA's and + // complex instructions, which may use different input lanes. + VecSizeShift = 24, + VecSize = 3 << VecSizeShift, + //===------------------------------------------------------------------===// // Code domain. DomainShift = 15, diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll --- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll @@ -10,22 +10,13 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB0_1: @ %vector.ph -; CHECK-NEXT: adds r3, r2, #3 -; CHECK-NEXT: bic r3, r3, #3 -; CHECK-NEXT: sub.w r12, r3, #4 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #2 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dlstp.32 lr, r2 ; CHECK-NEXT: .LBB0_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r2 -; CHECK-NEXT: subs r2, #4 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q0, [r1], #16 +; CHECK-NEXT: vldrw.u32 q0, [r1], #16 ; CHECK-NEXT: vmovlb.s16 q0, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrwt.32 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB0_2 +; CHECK-NEXT: vstrw.32 q0, [r0], #16 +; CHECK-NEXT: letp lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -66,22 +57,13 @@ ; CHECK-NEXT: it lt ; CHECK-NEXT: poplt {r7, pc} ; CHECK-NEXT: .LBB1_1: @ %vector.ph -; CHECK-NEXT: adds r3, r2, #7 -; CHECK-NEXT: bic r3, r3, #7 -; CHECK-NEXT: sub.w r12, r3, #8 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r3, r3, r12, lsr #3 -; CHECK-NEXT: dls lr, r3 +; CHECK-NEXT: dlstp.16 lr, r2 ; CHECK-NEXT: .LBB1_2: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.16 r2 -; CHECK-NEXT: subs r2, #8 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrht.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 ; CHECK-NEXT: vmovlb.u8 q0, q0 -; CHECK-NEXT: vpst -; CHECK-NEXT: vstrht.16 q0, [r0], #16 -; CHECK-NEXT: le lr, .LBB1_2 +; CHECK-NEXT: vstrh.16 q0, [r0], #16 +; CHECK-NEXT: letp lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} entry: @@ -174,35 +156,22 @@ ; CHECK-NEXT: push {r7, lr} ; CHECK-NEXT: ldrsh.w r1, [sp, #8] ; CHECK-NEXT: vmov.i16 q0, #0x100 -; CHECK-NEXT: cmp r1, #8 -; CHECK-NEXT: mov r3, r1 -; CHECK-NEXT: it ge -; CHECK-NEXT: movge r3, #8 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 -; CHECK-NEXT: subs r3, r1, r3 ; CHECK-NEXT: vldrb.u16 q2, [r0], #8 -; CHECK-NEXT: add.w r12, r3, #7 -; CHECK-NEXT: movs r3, #1 -; CHECK-NEXT: add.w r12, r3, r12, lsr #3 ; CHECK-NEXT: ldr r3, [sp, #12] -; CHECK-NEXT: dls lr, r12 +; CHECK-NEXT: dlstp.16 lr, r1 ; CHECK-NEXT: .LBB3_1: @ %do.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.16 r1 -; CHECK-NEXT: subs r1, #8 ; CHECK-NEXT: vmovlb.u8 q1, q1 -; CHECK-NEXT: vpst -; CHECK-NEXT: vsubt.i16 q3, q0, q1 +; CHECK-NEXT: vsub.i16 q3, q0, q1 ; CHECK-NEXT: vmovlb.u8 q2, q2 -; CHECK-NEXT: vpstttt -; CHECK-NEXT: vmult.i16 q3, q2, q3 -; CHECK-NEXT: vmlat.u16 q3, q1, r3 -; CHECK-NEXT: vshrt.u16 q3, q3, #8 -; CHECK-NEXT: vldrbt.u16 q1, [r2], #8 -; CHECK-NEXT: vpstt -; CHECK-NEXT: vldrbt.u16 q2, [r0], #8 -; CHECK-NEXT: vstrbt.16 q3, [r0, #-16] -; CHECK-NEXT: le lr, .LBB3_1 +; CHECK-NEXT: vmul.i16 q3, q2, q3 +; CHECK-NEXT: vmla.u16 q3, q1, r3 +; CHECK-NEXT: vshr.u16 q3, q3, #8 +; CHECK-NEXT: vldrb.u16 q1, [r2], #8 +; CHECK-NEXT: vldrb.u16 q2, [r0], #8 +; CHECK-NEXT: vstrb.16 q3, [r0, #-16] +; CHECK-NEXT: letp lr, .LBB3_1 ; CHECK-NEXT: @ %bb.2: @ %do.end ; CHECK-NEXT: pop {r7, pc} entry: diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -1216,3 +1216,886 @@ << MII->getName(Op) << " has unexpected side effects"; } } + +TEST(MachineInstr, MVEVecSize) { + using namespace ARM; + auto MVEVecSize = [](unsigned Opcode) { + switch (Opcode) { + default: + dbgs() << Opcode << "\n"; + llvm_unreachable("Unexpected MVE instruction!"); + case MVE_ASRLi: + case MVE_ASRLr: + case MVE_LSLLi: + case MVE_LSLLr: + case MVE_LSRL: + case MVE_SQRSHR: + case MVE_SQRSHRL: + case MVE_SQSHL: + case MVE_SQSHLL: + case MVE_SRSHR: + case MVE_SRSHRL: + case MVE_UQRSHL: + case MVE_UQRSHLL: + case MVE_UQSHL: + case MVE_UQSHLL: + case MVE_URSHR: + case MVE_URSHRL: + case MVE_VABAVs8: + case MVE_VABAVu8: + case MVE_VABDs8: + case MVE_VABDu8: + case MVE_VABSs8: + case MVE_VADDVs8acc: + case MVE_VADDVs8no_acc: + case MVE_VADDVu8acc: + case MVE_VADDVu8no_acc: + case MVE_VADD_qr_i8: + case MVE_VADDi8: + case MVE_VBRSR8: + case MVE_VCADDi8: + case MVE_VCLSs8: + case MVE_VCLZs8: + case MVE_VCMPi8: + case MVE_VCMPi8r: + case MVE_VCMPs8: + case MVE_VCMPs8r: + case MVE_VCMPu8: + case MVE_VCMPu8r: + case MVE_VCTP8: + case MVE_VDDUPu8: + case MVE_VDUP8: + case MVE_VDWDUPu8: + case MVE_VHADD_qr_s8: + case MVE_VHADD_qr_u8: + case MVE_VHADDs8: + case MVE_VHADDu8: + case MVE_VHCADDs8: + case MVE_VHSUB_qr_s8: + case MVE_VHSUB_qr_u8: + case MVE_VHSUBs8: + case MVE_VHSUBu8: + case MVE_VIDUPu8: + case MVE_VIWDUPu8: + case MVE_VLD20_8: + case MVE_VLD20_8_wb: + case MVE_VLD21_8: + case MVE_VLD21_8_wb: + case MVE_VLD40_8: + case MVE_VLD40_8_wb: + case MVE_VLD41_8: + case MVE_VLD41_8_wb: + case MVE_VLD42_8: + case MVE_VLD42_8_wb: + case MVE_VLD43_8: + case MVE_VLD43_8_wb: + case MVE_VLDRBU8: + case MVE_VLDRBU8_post: + case MVE_VLDRBU8_pre: + case MVE_VLDRBU8_rq: + case MVE_VMAXAVs8: + case MVE_VMAXAs8: + case MVE_VMAXVs8: + case MVE_VMAXVu8: + case MVE_VMAXs8: + case MVE_VMAXu8: + case MVE_VMINAVs8: + case MVE_VMINAs8: + case MVE_VMINVs8: + case MVE_VMINVu8: + case MVE_VMINs8: + case MVE_VMINu8: + case MVE_VMLADAVas8: + case MVE_VMLADAVau8: + case MVE_VMLADAVaxs8: + case MVE_VMLADAVs8: + case MVE_VMLADAVu8: + case MVE_VMLADAVxs8: + case MVE_VMLAS_qr_s8: + case MVE_VMLAS_qr_u8: + case MVE_VMLA_qr_s8: + case MVE_VMLA_qr_u8: + case MVE_VMLSDAVas8: + case MVE_VMLSDAVaxs8: + case MVE_VMLSDAVs8: + case MVE_VMLSDAVxs8: + case MVE_VMOV_from_lane_s8: + case MVE_VMOV_from_lane_u8: + case MVE_VMOV_to_lane_8: + case MVE_VMOVimmi8: + case MVE_VMULHs8: + case MVE_VMULHu8: + case MVE_VMUL_qr_i8: + case MVE_VMULi8: + case MVE_VNEGs8: + case MVE_VPTv16i8: + case MVE_VPTv16i8r: + case MVE_VPTv16s8: + case MVE_VPTv16s8r: + case MVE_VPTv16u8: + case MVE_VPTv16u8r: + case MVE_VQABSs8: + case MVE_VQADD_qr_s8: + case MVE_VQADD_qr_u8: + case MVE_VQADDs8: + case MVE_VQADDu8: + case MVE_VQDMLADHXs8: + case MVE_VQDMLADHs8: + case MVE_VQDMLAH_qrs8: + case MVE_VQDMLASH_qrs8: + case MVE_VQDMLSDHXs8: + case MVE_VQDMLSDHs8: + case MVE_VQDMULH_qr_s8: + case MVE_VQDMULHi8: + case MVE_VQNEGs8: + case MVE_VQRDMLADHXs8: + case MVE_VQRDMLADHs8: + case MVE_VQRDMLAH_qrs8: + case MVE_VQRDMLASH_qrs8: + case MVE_VQRDMLSDHXs8: + case MVE_VQRDMLSDHs8: + case MVE_VQRDMULH_qr_s8: + case MVE_VQRDMULHi8: + case MVE_VQRSHL_by_vecs8: + case MVE_VQRSHL_by_vecu8: + case MVE_VQRSHL_qrs8: + case MVE_VQRSHL_qru8: + case MVE_VQSHLU_imms8: + case MVE_VQSHL_by_vecs8: + case MVE_VQSHL_by_vecu8: + case MVE_VQSHL_qrs8: + case MVE_VQSHL_qru8: + case MVE_VQSHLimms8: + case MVE_VQSHLimmu8: + case MVE_VQSUB_qr_s8: + case MVE_VQSUB_qr_u8: + case MVE_VQSUBs8: + case MVE_VQSUBu8: + case MVE_VRHADDs8: + case MVE_VRHADDu8: + case MVE_VRMULHs8: + case MVE_VRMULHu8: + case MVE_VRSHL_by_vecs8: + case MVE_VRSHL_by_vecu8: + case MVE_VRSHL_qrs8: + case MVE_VRSHL_qru8: + case MVE_VRSHR_imms8: + case MVE_VRSHR_immu8: + case MVE_VSHL_by_vecs8: + case MVE_VSHL_by_vecu8: + case MVE_VSHL_immi8: + case MVE_VSHL_qru8: + case MVE_VSHL_qrs8: + case MVE_VSHR_imms8: + case MVE_VSHR_immu8: + case MVE_VSLIimm8: + case MVE_VSRIimm8: + case MVE_VST20_8: + case MVE_VST20_8_wb: + case MVE_VST21_8: + case MVE_VST21_8_wb: + case MVE_VST40_8: + case MVE_VST40_8_wb: + case MVE_VST41_8: + case MVE_VST41_8_wb: + case MVE_VST42_8: + case MVE_VST42_8_wb: + case MVE_VST43_8: + case MVE_VST43_8_wb: + case MVE_VSTRB8_rq: + case MVE_VSTRBU8: + case MVE_VSTRBU8_post: + case MVE_VSTRBU8_pre: + case MVE_VSUB_qr_i8: + case MVE_VSUBi8: + case MVE_VAND: + case MVE_VBIC: + case MVE_VEOR: + case MVE_VMVN: + case MVE_VORN: + case MVE_VORR: + case MVE_VPNOT: + case MVE_VPSEL: + case MVE_VPST: + return 0; + case MVE_VABAVs16: + case MVE_VABAVu16: + case MVE_VABDf16: + case MVE_VABDs16: + case MVE_VABDu16: + case MVE_VABSf16: + case MVE_VABSs16: + case MVE_VADDVs16acc: + case MVE_VADDVs16no_acc: + case MVE_VADDVu16acc: + case MVE_VADDVu16no_acc: + case MVE_VADD_qr_f16: + case MVE_VADD_qr_i16: + case MVE_VADDf16: + case MVE_VADDi16: + case MVE_VBICimmi16: + case MVE_VBRSR16: + case MVE_VCADDf16: + case MVE_VCADDi16: + case MVE_VCLSs16: + case MVE_VCLZs16: + case MVE_VCMLAf16: + case MVE_VCMPf16: + case MVE_VCMPf16r: + case MVE_VCMPi16: + case MVE_VCMPi16r: + case MVE_VCMPs16: + case MVE_VCMPs16r: + case MVE_VCMPu16: + case MVE_VCMPu16r: + case MVE_VCMULf16: + case MVE_VCTP16: + case MVE_VCVTf16s16_fix: + case MVE_VCVTf16s16n: + case MVE_VCVTf16u16_fix: + case MVE_VCVTf16u16n: + case MVE_VCVTs16f16_fix: + case MVE_VCVTs16f16a: + case MVE_VCVTs16f16m: + case MVE_VCVTs16f16n: + case MVE_VCVTs16f16p: + case MVE_VCVTs16f16z: + case MVE_VCVTu16f16_fix: + case MVE_VCVTu16f16a: + case MVE_VCVTu16f16m: + case MVE_VCVTu16f16n: + case MVE_VCVTu16f16p: + case MVE_VCVTu16f16z: + case MVE_VDDUPu16: + case MVE_VDUP16: + case MVE_VDWDUPu16: + case MVE_VFMA_qr_Sf16: + case MVE_VFMA_qr_f16: + case MVE_VFMAf16: + case MVE_VFMSf16: + case MVE_VHADD_qr_s16: + case MVE_VHADD_qr_u16: + case MVE_VHADDs16: + case MVE_VHADDu16: + case MVE_VHCADDs16: + case MVE_VHSUB_qr_s16: + case MVE_VHSUB_qr_u16: + case MVE_VHSUBs16: + case MVE_VHSUBu16: + case MVE_VIDUPu16: + case MVE_VIWDUPu16: + case MVE_VLD20_16: + case MVE_VLD20_16_wb: + case MVE_VLD21_16: + case MVE_VLD21_16_wb: + case MVE_VLD40_16: + case MVE_VLD40_16_wb: + case MVE_VLD41_16: + case MVE_VLD41_16_wb: + case MVE_VLD42_16: + case MVE_VLD42_16_wb: + case MVE_VLD43_16: + case MVE_VLD43_16_wb: + case MVE_VLDRBS16: + case MVE_VLDRBS16_post: + case MVE_VLDRBS16_pre: + case MVE_VLDRBS16_rq: + case MVE_VLDRBU16: + case MVE_VLDRBU16_post: + case MVE_VLDRBU16_pre: + case MVE_VLDRBU16_rq: + case MVE_VLDRHU16: + case MVE_VLDRHU16_post: + case MVE_VLDRHU16_pre: + case MVE_VLDRHU16_rq: + case MVE_VLDRHU16_rq_u: + case MVE_VMAXAVs16: + case MVE_VMAXAs16: + case MVE_VMAXNMAVf16: + case MVE_VMAXNMAf16: + case MVE_VMAXNMVf16: + case MVE_VMAXNMf16: + case MVE_VMAXVs16: + case MVE_VMAXVu16: + case MVE_VMAXs16: + case MVE_VMAXu16: + case MVE_VMINAVs16: + case MVE_VMINAs16: + case MVE_VMINNMAVf16: + case MVE_VMINNMAf16: + case MVE_VMINNMVf16: + case MVE_VMINNMf16: + case MVE_VMINVs16: + case MVE_VMINVu16: + case MVE_VMINs16: + case MVE_VMINu16: + case MVE_VMLADAVas16: + case MVE_VMLADAVau16: + case MVE_VMLADAVaxs16: + case MVE_VMLADAVs16: + case MVE_VMLADAVu16: + case MVE_VMLADAVxs16: + case MVE_VMLALDAVas16: + case MVE_VMLALDAVau16: + case MVE_VMLALDAVaxs16: + case MVE_VMLALDAVs16: + case MVE_VMLALDAVu16: + case MVE_VMLALDAVxs16: + case MVE_VMLAS_qr_s16: + case MVE_VMLAS_qr_u16: + case MVE_VMLA_qr_s16: + case MVE_VMLA_qr_u16: + case MVE_VMLSDAVas16: + case MVE_VMLSDAVaxs16: + case MVE_VMLSDAVs16: + case MVE_VMLSDAVxs16: + case MVE_VMLSLDAVas16: + case MVE_VMLSLDAVaxs16: + case MVE_VMLSLDAVs16: + case MVE_VMLSLDAVxs16: + case MVE_VMOVNi16bh: + case MVE_VMOVNi16th: + case MVE_VMOV_from_lane_s16: + case MVE_VMOV_from_lane_u16: + case MVE_VMOV_to_lane_16: + case MVE_VMOVimmi16: + case MVE_VMOVLs8bh: + case MVE_VMOVLs8th: + case MVE_VMOVLu8bh: + case MVE_VMOVLu8th: + case MVE_VMULLBp8: + case MVE_VMULLBs8: + case MVE_VMULLBu8: + case MVE_VMULLTp8: + case MVE_VMULLTs8: + case MVE_VMULLTu8: + case MVE_VMULHs16: + case MVE_VMULHu16: + case MVE_VMUL_qr_f16: + case MVE_VMUL_qr_i16: + case MVE_VMULf16: + case MVE_VMULi16: + case MVE_VMVNimmi16: + case MVE_VNEGf16: + case MVE_VNEGs16: + case MVE_VORRimmi16: + case MVE_VPTv8f16: + case MVE_VPTv8f16r: + case MVE_VPTv8i16: + case MVE_VPTv8i16r: + case MVE_VPTv8s16: + case MVE_VPTv8s16r: + case MVE_VPTv8u16: + case MVE_VPTv8u16r: + case MVE_VQABSs16: + case MVE_VQADD_qr_s16: + case MVE_VQADD_qr_u16: + case MVE_VQADDs16: + case MVE_VQADDu16: + case MVE_VQDMLADHXs16: + case MVE_VQDMLADHs16: + case MVE_VQDMLAH_qrs16: + case MVE_VQDMLASH_qrs16: + case MVE_VQDMLSDHXs16: + case MVE_VQDMLSDHs16: + case MVE_VQDMULH_qr_s16: + case MVE_VQDMULHi16: + case MVE_VQDMULL_qr_s16bh: + case MVE_VQDMULL_qr_s16th: + case MVE_VQDMULLs16bh: + case MVE_VQDMULLs16th: + case MVE_VQMOVNs16bh: + case MVE_VQMOVNs16th: + case MVE_VQMOVNu16bh: + case MVE_VQMOVNu16th: + case MVE_VQMOVUNs16bh: + case MVE_VQMOVUNs16th: + case MVE_VQNEGs16: + case MVE_VQRDMLADHXs16: + case MVE_VQRDMLADHs16: + case MVE_VQRDMLAH_qrs16: + case MVE_VQRDMLASH_qrs16: + case MVE_VQRDMLSDHXs16: + case MVE_VQRDMLSDHs16: + case MVE_VQRDMULH_qr_s16: + case MVE_VQRDMULHi16: + case MVE_VQRSHL_by_vecs16: + case MVE_VQRSHL_by_vecu16: + case MVE_VQRSHL_qrs16: + case MVE_VQRSHL_qru16: + case MVE_VQRSHRNbhs16: + case MVE_VQRSHRNbhu16: + case MVE_VQRSHRNths16: + case MVE_VQRSHRNthu16: + case MVE_VQRSHRUNs16bh: + case MVE_VQRSHRUNs16th: + case MVE_VQSHLU_imms16: + case MVE_VQSHL_by_vecs16: + case MVE_VQSHL_by_vecu16: + case MVE_VQSHL_qrs16: + case MVE_VQSHL_qru16: + case MVE_VQSHLimms16: + case MVE_VQSHLimmu16: + case MVE_VQSHRNbhs16: + case MVE_VQSHRNbhu16: + case MVE_VQSHRNths16: + case MVE_VQSHRNthu16: + case MVE_VQSHRUNs16bh: + case MVE_VQSHRUNs16th: + case MVE_VQSUB_qr_s16: + case MVE_VQSUB_qr_u16: + case MVE_VQSUBs16: + case MVE_VQSUBu16: + case MVE_VREV16_8: + case MVE_VRHADDs16: + case MVE_VRHADDu16: + case MVE_VRINTf16A: + case MVE_VRINTf16M: + case MVE_VRINTf16N: + case MVE_VRINTf16P: + case MVE_VRINTf16X: + case MVE_VRINTf16Z: + case MVE_VRMULHs16: + case MVE_VRMULHu16: + case MVE_VRSHL_by_vecs16: + case MVE_VRSHL_by_vecu16: + case MVE_VRSHL_qrs16: + case MVE_VRSHL_qru16: + case MVE_VRSHRNi16bh: + case MVE_VRSHRNi16th: + case MVE_VRSHR_imms16: + case MVE_VRSHR_immu16: + case MVE_VSHLL_imms8bh: + case MVE_VSHLL_imms8th: + case MVE_VSHLL_immu8bh: + case MVE_VSHLL_immu8th: + case MVE_VSHLL_lws8bh: + case MVE_VSHLL_lws8th: + case MVE_VSHLL_lwu8bh: + case MVE_VSHLL_lwu8th: + case MVE_VSHL_by_vecs16: + case MVE_VSHL_by_vecu16: + case MVE_VSHL_immi16: + case MVE_VSHL_qrs16: + case MVE_VSHL_qru16: + case MVE_VSHRNi16bh: + case MVE_VSHRNi16th: + case MVE_VSHR_imms16: + case MVE_VSHR_immu16: + case MVE_VSLIimm16: + case MVE_VSRIimm16: + case MVE_VST20_16: + case MVE_VST20_16_wb: + case MVE_VST21_16: + case MVE_VST21_16_wb: + case MVE_VST40_16: + case MVE_VST40_16_wb: + case MVE_VST41_16: + case MVE_VST41_16_wb: + case MVE_VST42_16: + case MVE_VST42_16_wb: + case MVE_VST43_16: + case MVE_VST43_16_wb: + case MVE_VSTRB16: + case MVE_VSTRB16_post: + case MVE_VSTRB16_pre: + case MVE_VSTRB16_rq: + case MVE_VSTRH16_rq: + case MVE_VSTRH16_rq_u: + case MVE_VSTRHU16: + case MVE_VSTRHU16_post: + case MVE_VSTRHU16_pre: + case MVE_VSUB_qr_f16: + case MVE_VSUB_qr_i16: + case MVE_VSUBf16: + case MVE_VSUBi16: + return 1; + case MVE_VABAVs32: + case MVE_VABAVu32: + case MVE_VABDf32: + case MVE_VABDs32: + case MVE_VABDu32: + case MVE_VABSf32: + case MVE_VABSs32: + case MVE_VADC: + case MVE_VADCI: + case MVE_VADDLVs32acc: + case MVE_VADDLVs32no_acc: + case MVE_VADDLVu32acc: + case MVE_VADDLVu32no_acc: + case MVE_VADDVs32acc: + case MVE_VADDVs32no_acc: + case MVE_VADDVu32acc: + case MVE_VADDVu32no_acc: + case MVE_VADD_qr_f32: + case MVE_VADD_qr_i32: + case MVE_VADDf32: + case MVE_VADDi32: + case MVE_VBICimmi32: + case MVE_VBRSR32: + case MVE_VCADDf32: + case MVE_VCADDi32: + case MVE_VCLSs32: + case MVE_VCLZs32: + case MVE_VCMLAf32: + case MVE_VCMPf32: + case MVE_VCMPf32r: + case MVE_VCMPi32: + case MVE_VCMPi32r: + case MVE_VCMPs32: + case MVE_VCMPs32r: + case MVE_VCMPu32: + case MVE_VCMPu32r: + case MVE_VCMULf32: + case MVE_VCTP32: + case MVE_VCVTf16f32bh: + case MVE_VCVTf16f32th: + case MVE_VCVTf32f16bh: + case MVE_VCVTf32f16th: + case MVE_VCVTf32s32_fix: + case MVE_VCVTf32s32n: + case MVE_VCVTf32u32_fix: + case MVE_VCVTf32u32n: + case MVE_VCVTs32f32_fix: + case MVE_VCVTs32f32a: + case MVE_VCVTs32f32m: + case MVE_VCVTs32f32n: + case MVE_VCVTs32f32p: + case MVE_VCVTs32f32z: + case MVE_VCVTu32f32_fix: + case MVE_VCVTu32f32a: + case MVE_VCVTu32f32m: + case MVE_VCVTu32f32n: + case MVE_VCVTu32f32p: + case MVE_VCVTu32f32z: + case MVE_VDDUPu32: + case MVE_VDUP32: + case MVE_VDWDUPu32: + case MVE_VFMA_qr_Sf32: + case MVE_VFMA_qr_f32: + case MVE_VFMAf32: + case MVE_VFMSf32: + case MVE_VHADD_qr_s32: + case MVE_VHADD_qr_u32: + case MVE_VHADDs32: + case MVE_VHADDu32: + case MVE_VHCADDs32: + case MVE_VHSUB_qr_s32: + case MVE_VHSUB_qr_u32: + case MVE_VHSUBs32: + case MVE_VHSUBu32: + case MVE_VIDUPu32: + case MVE_VIWDUPu32: + case MVE_VLD20_32: + case MVE_VLD20_32_wb: + case MVE_VLD21_32: + case MVE_VLD21_32_wb: + case MVE_VLD40_32: + case MVE_VLD40_32_wb: + case MVE_VLD41_32: + case MVE_VLD41_32_wb: + case MVE_VLD42_32: + case MVE_VLD42_32_wb: + case MVE_VLD43_32: + case MVE_VLD43_32_wb: + case MVE_VLDRBS32: + case MVE_VLDRBS32_post: + case MVE_VLDRBS32_pre: + case MVE_VLDRBS32_rq: + case MVE_VLDRBU32: + case MVE_VLDRBU32_post: + case MVE_VLDRBU32_pre: + case MVE_VLDRBU32_rq: + case MVE_VLDRHS32: + case MVE_VLDRHS32_post: + case MVE_VLDRHS32_pre: + case MVE_VLDRHS32_rq: + case MVE_VLDRHS32_rq_u: + case MVE_VLDRHU32: + case MVE_VLDRHU32_post: + case MVE_VLDRHU32_pre: + case MVE_VLDRHU32_rq: + case MVE_VLDRHU32_rq_u: + case MVE_VLDRWU32: + case MVE_VLDRWU32_post: + case MVE_VLDRWU32_pre: + case MVE_VLDRWU32_qi: + case MVE_VLDRWU32_qi_pre: + case MVE_VLDRWU32_rq: + case MVE_VLDRWU32_rq_u: + case MVE_VMAXAVs32: + case MVE_VMAXAs32: + case MVE_VMAXNMAVf32: + case MVE_VMAXNMAf32: + case MVE_VMAXNMVf32: + case MVE_VMAXNMf32: + case MVE_VMAXVs32: + case MVE_VMAXVu32: + case MVE_VMAXs32: + case MVE_VMAXu32: + case MVE_VMINAVs32: + case MVE_VMINAs32: + case MVE_VMINNMAVf32: + case MVE_VMINNMAf32: + case MVE_VMINNMVf32: + case MVE_VMINNMf32: + case MVE_VMINVs32: + case MVE_VMINVu32: + case MVE_VMINs32: + case MVE_VMINu32: + case MVE_VMLADAVas32: + case MVE_VMLADAVau32: + case MVE_VMLADAVaxs32: + case MVE_VMLADAVs32: + case MVE_VMLADAVu32: + case MVE_VMLADAVxs32: + case MVE_VMLALDAVas32: + case MVE_VMLALDAVau32: + case MVE_VMLALDAVaxs32: + case MVE_VMLALDAVs32: + case MVE_VMLALDAVu32: + case MVE_VMLALDAVxs32: + case MVE_VMLAS_qr_s32: + case MVE_VMLAS_qr_u32: + case MVE_VMLA_qr_s32: + case MVE_VMLA_qr_u32: + case MVE_VMLSDAVas32: + case MVE_VMLSDAVaxs32: + case MVE_VMLSDAVs32: + case MVE_VMLSDAVxs32: + case MVE_VMLSLDAVas32: + case MVE_VMLSLDAVaxs32: + case MVE_VMLSLDAVs32: + case MVE_VMLSLDAVxs32: + case MVE_VMOVNi32bh: + case MVE_VMOVNi32th: + case MVE_VMOV_from_lane_32: + case MVE_VMOV_q_rr: + case MVE_VMOV_rr_q: + case MVE_VMOV_to_lane_32: + case MVE_VMOVimmf32: + case MVE_VMOVimmi32: + case MVE_VMOVLs16bh: + case MVE_VMOVLs16th: + case MVE_VMOVLu16bh: + case MVE_VMOVLu16th: + case MVE_VMULHs32: + case MVE_VMULHu32: + case MVE_VMULLBp16: + case MVE_VMULLBs16: + case MVE_VMULLBu16: + case MVE_VMULLTp16: + case MVE_VMULLTs16: + case MVE_VMULLTu16: + case MVE_VMUL_qr_f32: + case MVE_VMUL_qr_i32: + case MVE_VMULf32: + case MVE_VMULi32: + case MVE_VMVNimmi32: + case MVE_VNEGf32: + case MVE_VNEGs32: + case MVE_VORRimmi32: + case MVE_VPTv4f32: + case MVE_VPTv4f32r: + case MVE_VPTv4i32: + case MVE_VPTv4i32r: + case MVE_VPTv4s32: + case MVE_VPTv4s32r: + case MVE_VPTv4u32: + case MVE_VPTv4u32r: + case MVE_VQABSs32: + case MVE_VQADD_qr_s32: + case MVE_VQADD_qr_u32: + case MVE_VQADDs32: + case MVE_VQADDu32: + case MVE_VQDMLADHXs32: + case MVE_VQDMLADHs32: + case MVE_VQDMLAH_qrs32: + case MVE_VQDMLASH_qrs32: + case MVE_VQDMLSDHXs32: + case MVE_VQDMLSDHs32: + case MVE_VQDMULH_qr_s32: + case MVE_VQDMULHi32: + case MVE_VQDMULL_qr_s32bh: + case MVE_VQDMULL_qr_s32th: + case MVE_VQDMULLs32bh: + case MVE_VQDMULLs32th: + case MVE_VQMOVNs32bh: + case MVE_VQMOVNs32th: + case MVE_VQMOVNu32bh: + case MVE_VQMOVNu32th: + case MVE_VQMOVUNs32bh: + case MVE_VQMOVUNs32th: + case MVE_VQNEGs32: + case MVE_VQRDMLADHXs32: + case MVE_VQRDMLADHs32: + case MVE_VQRDMLAH_qrs32: + case MVE_VQRDMLASH_qrs32: + case MVE_VQRDMLSDHXs32: + case MVE_VQRDMLSDHs32: + case MVE_VQRDMULH_qr_s32: + case MVE_VQRDMULHi32: + case MVE_VQRSHL_by_vecs32: + case MVE_VQRSHL_by_vecu32: + case MVE_VQRSHL_qrs32: + case MVE_VQRSHL_qru32: + case MVE_VQRSHRNbhs32: + case MVE_VQRSHRNbhu32: + case MVE_VQRSHRNths32: + case MVE_VQRSHRNthu32: + case MVE_VQRSHRUNs32bh: + case MVE_VQRSHRUNs32th: + case MVE_VQSHLU_imms32: + case MVE_VQSHL_by_vecs32: + case MVE_VQSHL_by_vecu32: + case MVE_VQSHL_qrs32: + case MVE_VQSHL_qru32: + case MVE_VQSHLimms32: + case MVE_VQSHLimmu32: + case MVE_VQSHRNbhs32: + case MVE_VQSHRNbhu32: + case MVE_VQSHRNths32: + case MVE_VQSHRNthu32: + case MVE_VQSHRUNs32bh: + case MVE_VQSHRUNs32th: + case MVE_VQSUB_qr_s32: + case MVE_VQSUB_qr_u32: + case MVE_VQSUBs32: + case MVE_VQSUBu32: + case MVE_VREV32_16: + case MVE_VREV32_8: + case MVE_VRHADDs32: + case MVE_VRHADDu32: + case MVE_VRINTf32A: + case MVE_VRINTf32M: + case MVE_VRINTf32N: + case MVE_VRINTf32P: + case MVE_VRINTf32X: + case MVE_VRINTf32Z: + case MVE_VRMLALDAVHas32: + case MVE_VRMLALDAVHau32: + case MVE_VRMLALDAVHaxs32: + case MVE_VRMLALDAVHs32: + case MVE_VRMLALDAVHu32: + case MVE_VRMLALDAVHxs32: + case MVE_VRMLSLDAVHas32: + case MVE_VRMLSLDAVHaxs32: + case MVE_VRMLSLDAVHs32: + case MVE_VRMLSLDAVHxs32: + case MVE_VRMULHs32: + case MVE_VRMULHu32: + case MVE_VRSHL_by_vecs32: + case MVE_VRSHL_by_vecu32: + case MVE_VRSHL_qrs32: + case MVE_VRSHL_qru32: + case MVE_VRSHRNi32bh: + case MVE_VRSHRNi32th: + case MVE_VRSHR_imms32: + case MVE_VRSHR_immu32: + case MVE_VSBC: + case MVE_VSBCI: + case MVE_VSHLC: + case MVE_VSHLL_imms16bh: + case MVE_VSHLL_imms16th: + case MVE_VSHLL_immu16bh: + case MVE_VSHLL_immu16th: + case MVE_VSHLL_lws16bh: + case MVE_VSHLL_lws16th: + case MVE_VSHLL_lwu16bh: + case MVE_VSHLL_lwu16th: + case MVE_VSHL_by_vecs32: + case MVE_VSHL_by_vecu32: + case MVE_VSHL_immi32: + case MVE_VSHL_qrs32: + case MVE_VSHL_qru32: + case MVE_VSHRNi32bh: + case MVE_VSHRNi32th: + case MVE_VSHR_imms32: + case MVE_VSHR_immu32: + case MVE_VSLIimm32: + case MVE_VSRIimm32: + case MVE_VST20_32: + case MVE_VST20_32_wb: + case MVE_VST21_32: + case MVE_VST21_32_wb: + case MVE_VST40_32: + case MVE_VST40_32_wb: + case MVE_VST41_32: + case MVE_VST41_32_wb: + case MVE_VST42_32: + case MVE_VST42_32_wb: + case MVE_VST43_32: + case MVE_VST43_32_wb: + case MVE_VSTRB32: + case MVE_VSTRB32_post: + case MVE_VSTRB32_pre: + case MVE_VSTRB32_rq: + case MVE_VSTRH32: + case MVE_VSTRH32_post: + case MVE_VSTRH32_pre: + case MVE_VSTRH32_rq: + case MVE_VSTRH32_rq_u: + case MVE_VSTRW32_qi: + case MVE_VSTRW32_qi_pre: + case MVE_VSTRW32_rq: + case MVE_VSTRW32_rq_u: + case MVE_VSTRWU32: + case MVE_VSTRWU32_post: + case MVE_VSTRWU32_pre: + case MVE_VSUB_qr_f32: + case MVE_VSUB_qr_i32: + case MVE_VSUBf32: + case MVE_VSUBi32: + return 2; + case MVE_VCTP64: + case MVE_VLDRDU64_qi: + case MVE_VLDRDU64_qi_pre: + case MVE_VLDRDU64_rq: + case MVE_VLDRDU64_rq_u: + case MVE_VMULLBs32: + case MVE_VMULLBu32: + case MVE_VMULLTs32: + case MVE_VMULLTu32: + case MVE_VMOVimmi64: + case MVE_VREV64_16: + case MVE_VREV64_32: + case MVE_VREV64_8: + case MVE_VSTRD64_qi: + case MVE_VSTRD64_qi_pre: + case MVE_VSTRD64_rq: + case MVE_VSTRD64_rq_u: + return 3; + } + }; + LLVMInitializeARMTargetInfo(); + LLVMInitializeARMTarget(); + LLVMInitializeARMTargetMC(); + + auto TT(Triple::normalize("thumbv8.1m.main-none-none-eabi")); + std::string Error; + const Target *T = TargetRegistry::lookupTarget(TT, Error); + if (!T) { + dbgs() << Error; + return; + } + + TargetOptions Options; + auto TM = std::unique_ptr( + static_cast( + T->createTargetMachine(TT, "generic", "", Options, None, None, + CodeGenOpt::Default))); + ARMSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()), + std::string(TM->getTargetFeatureString()), + *static_cast(TM.get()), false); + + auto MII = TM->getMCInstrInfo(); + for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) { + uint64_t Flags = MII->get(i).TSFlags; + if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE) + continue; + int Size = (Flags & ARMII::VecSize) >> ARMII::VecSizeShift; + ASSERT_EQ(MVEVecSize(i), Size) + << MII->getName(i) + << ": mismatched expectation for MVE vec size\n"; + } +} \ No newline at end of file