diff --git a/llvm/lib/Target/ARM/ARMInstrFormats.td b/llvm/lib/Target/ARM/ARMInstrFormats.td --- a/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -408,7 +408,7 @@ // mnemonic (when not in an IT block) or preclude it (when in an IT block). bit thumbArithFlagSetting = 0; - bit invalidForTailPredication = 0; + bit validForTailPredication = 0; // If this is a pseudo instruction, mark it isCodeGenOnly. let isCodeGenOnly = !eq(!cast(f), "Pseudo"); @@ -421,7 +421,7 @@ let TSFlags{14} = canXformTo16Bit; let TSFlags{18-15} = D.Value; let TSFlags{19} = thumbArithFlagSetting; - let TSFlags{20} = invalidForTailPredication; + let TSFlags{20} = validForTailPredication; let Constraints = cstr; let Itinerary = itin; diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -507,7 +507,6 @@ let Inst{5} = Qm{3}; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; - let invalidForTailPredication = 1; } def MVE_VABAVs8 : MVE_VABAV<"s8", 0b0, 0b00>; @@ -534,7 +533,6 @@ let Inst{5} = A; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; - let invalidForTailPredication = 1; } multiclass MVE_VADDV_A size, @@ -585,7 +583,6 @@ let Inst{5} = A; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; - let invalidForTailPredication = 1; } multiclass MVE_VADDLV_A pattern=[]> { @@ -623,7 +620,6 @@ let Inst{0} = 0b0; let Predicates = [HasMVEFloat]; - let invalidForTailPredication = 1; } multiclass MVE_VMINMAXNMV_fty pattern=[]> { @@ -660,7 +656,6 @@ let Inst{6-5} = 0b00; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; - let invalidForTailPredication = 1; } multiclass MVE_VMINMAXV_ty pattern=[]> { @@ -733,7 +728,6 @@ let Inst{5} = A; let Inst{3-1} = Qm{2-0}; let Inst{0} = bit_0; - let invalidForTailPredication = 1; } multiclass MVE_VMLAMLSDAV_A size, bits<2> bit_8_7, string cstr=""> @@ -1104,6 +1098,7 @@ let Inst{12-6} = 0b0010111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } let Predicates = [HasMVEInt] in { @@ -1132,6 +1127,7 @@ let Inst{6} = 0b1; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VEOR : MVE_bit_ops<"veor", 0b00, 0b1>; @@ -1223,6 +1219,7 @@ class MVE_VORR cmode, ExpandImm imm_type> : MVE_bit_cmode<"vorr", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VORRIZ0v4i32 : MVE_VORR<"i32", 0b0001, expzero00>; @@ -1251,6 +1248,7 @@ class MVE_VBIC cmode, ExpandImm imm_type> : MVE_bit_cmode<"vbic", suffix, cmode, (ins MQPR:$Qd_src, imm_type:$imm)> { let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBICIZ0v4i32 : MVE_VBIC<"i32", 0b0001, expzero00>; @@ -1490,6 +1488,7 @@ let Inst{12-8} = 0b01000; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VADD size, list pattern=[]> @@ -1535,6 +1534,7 @@ let Inst{8} = 0b0; let Inst{4} = 0b1; let Inst{0} = 0b0; + let validForTailPredication = 1; ValueType VT = vt; } @@ -1587,6 +1587,7 @@ let Inst{12-8} = 0b00111; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABDs8 : MVE_VABD_int<"s8", 0b0, 0b00>; @@ -1605,6 +1606,7 @@ let Inst{12-8} = 0b00001; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VRHADDs8 : MVE_VRHADD<"s8", 0b0, 0b00>; @@ -1626,6 +1628,7 @@ let Inst{8} = 0b0; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VHADD size, @@ -1721,6 +1724,7 @@ let Inst{6} = 0b0; let Inst{5} = E; let Inst{4-0} = 0b10000; + let validForTailPredication = 1; } def MVE_VDUP32 : MVE_VDUP<"32", 0b0, 0b0>; @@ -1783,6 +1787,7 @@ let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VCLSs8 : MVE_VCLSCLZ<"vcls", "s8", 0b00, 0b0>; @@ -1815,6 +1820,7 @@ let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VABSs8 : MVE_VABSNEG_int<"vabs", "s8", 0b00, 0b0>; @@ -1856,6 +1862,7 @@ let Inst{6} = 0b1; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } def MVE_VQABSs8 : MVE_VQABSNEG<"vqabs", "s8", 0b00, 0b0>; @@ -1887,6 +1894,7 @@ let Inst{3-0} = imm{3-0}; let DecoderMethod = "DecodeMVEModImmInstruction"; + let validForTailPredication = 1; } let isReMaterializable = 1 in { @@ -2282,6 +2290,7 @@ let Inst{4} = bit_4; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; + let validForTailPredication = 1; } multiclass mve_shift_by_vec_multi { @@ -2330,6 +2339,7 @@ let Inst{4} = 0b1; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b0; + let validForTailPredication = 1; } class MVE_VSxI_imm @@ -2342,6 +2352,7 @@ let Inst{21-16} = imm; let Inst{10-9} = 0b10; let Inst{8} = bit_8; + let validForTailPredication = 1; } def MVE_VSRIimm8 : MVE_VSxI_imm<"vsri", "8", 0b0, (ins shr_imm8:$imm)> { @@ -2594,6 +2605,7 @@ let Inst{11-10} = 0b01; let Inst{9-7} = op{2-0}; let Inst{4} = 0b0; + let validForTailPredication = 1; } @@ -2656,6 +2668,7 @@ let Inst{12-8} = 0b01101; let Inst{7} = Qn{3}; let Inst{4} = 0b1; + let validForTailPredication = 1; } def MVE_VMULf32 : MVE_VMUL_fp<"f32", 0b0>; @@ -2751,8 +2764,10 @@ } -def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; -def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +let validForTailPredication = 1 in { + def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; + def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2761,8 +2776,11 @@ (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; } -def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; -def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; + +let validForTailPredication = 1 in { + def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; + def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; +} let Predicates = [HasMVEFloat] in { def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), @@ -2812,6 +2830,7 @@ let Inst{11-8} = 0b1101; let Inst{7} = Qn{3}; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABDf32 : MVE_VABD_fp<"f32", 0b0>; @@ -2838,6 +2857,7 @@ let Inst{4} = 0b1; let DecoderMethod = "DecodeMVEVCVTt1fp"; + let validForTailPredication = 1; } class MVE_VCVT_imm_asmop : AsmOperandClass { @@ -2888,6 +2908,7 @@ let Inst{9-8} = rm; let Inst{7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VCVT_fp_int_anpm_multi size, bit op, @@ -2922,6 +2943,7 @@ let Inst{12-9} = 0b0011; let Inst{8-7} = op; let Inst{4} = 0b0; + let validForTailPredication = 1; } // The unsuffixed VCVT for float->int implicitly rounds toward zero, @@ -2971,6 +2993,7 @@ let Inst{11-8} = 0b0111; let Inst{7} = negate; let Inst{4} = 0b0; + let validForTailPredication = 1; } def MVE_VABSf16 : MVE_VABSNEG_fp<"vabs", "f16", 0b01, 0b0>; @@ -3058,6 +3081,7 @@ // decoder to emit an operand that isn't affected by any instruction // bit. let DecoderMethod = "DecodeMVEVCMP"; + let validForTailPredication = 1; } class MVE_VCMPqqf @@ -3122,6 +3146,7 @@ let Constraints = ""; // Custom decoder method, for the same reason as MVE_VCMPqq let DecoderMethod = "DecodeMVEVCMP"; + let validForTailPredication = 1; } class MVE_VCMPqrf @@ -3667,6 +3692,7 @@ let Inst{12} = bit_12; let Inst{8} = 0b1; let Inst{5} = bit_5; + let validForTailPredication = 1; } multiclass MVE_VADDSUB_qr_sizes; @@ -3774,6 +3801,7 @@ let Inst{12-8} = 0b11110; let Inst{7} = bit_7; let Inst{6-4} = 0b110; + let validForTailPredication = 1; } multiclass MVE_VxSHL_qr_types { @@ -3815,6 +3843,7 @@ let Inst{12} = 0b1; let Inst{8} = 0b0; let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VBRSR8 : MVE_VBRSR<"vbrsr", "8", 0b00>; @@ -3842,6 +3871,7 @@ let Inst{12} = 0b1; let Inst{8} = 0b0; let Inst{5} = 0b1; + let validForTailPredication = 1; } def MVE_VMUL_qr_i8 : MVE_VMUL_qr_int<"vmul", "i8", 0b00>; @@ -3877,7 +3907,7 @@ def MVE_VQRDMULH_qr_s16 : MVE_VxxMUL_qr<"vqrdmulh", "s16", 0b1, 0b01>; def MVE_VQRDMULH_qr_s32 : MVE_VxxMUL_qr<"vqrdmulh", "s32", 0b1, 0b10>; -let Predicates = [HasMVEFloat] in { +let Predicates = [HasMVEFloat], validForTailPredication = 1 in { def MVE_VMUL_qr_f16 : MVE_VxxMUL_qr<"vmul", "f16", 0b1, 0b11>; def MVE_VMUL_qr_f32 : MVE_VxxMUL_qr<"vmul", "f32", 0b0, 0b11>; } @@ -3893,6 +3923,7 @@ let Inst{12} = S; let Inst{8} = 0b0; let Inst{5} = 0b0; + let validForTailPredication = 1; } def MVE_VMLA_qr_s8 : MVE_VFMAMLA_qr<"vmla", "s8", 0b0, 0b00, 0b0>; @@ -3976,6 +4007,7 @@ let Inst{7} = imm{1}; let Inst{6-1} = 0b110111; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIDUPu8 : MVE_VxDUP<"vidup", "u8", 0b00, 0b0>; @@ -4010,6 +4042,7 @@ let Inst{6-4} = 0b110; let Inst{3-1} = Rm{3-1}; let Inst{0} = imm{0}; + let validForTailPredication = 1; } def MVE_VIWDUPu8 : MVE_VxWDUP<"viwdup", "u8", 0b00, 0b0>; @@ -4036,6 +4069,7 @@ let Constraints = ""; let DecoderMethod = "DecodeMveVCTP"; + let validForTailPredication = 1; } def MVE_VCTP8 : MVE_VCTP<"8", 0b00>; @@ -4294,6 +4328,7 @@ let mayLoad = dir.load; let mayStore = !eq(dir.load,0); + let validForTailPredication = 1; } // Contiguous load and store instructions. These come in two main @@ -4597,6 +4632,7 @@ let Inst{4} = 0b0; let Defs = [VPR]; + let validForTailPredication = 1; } class MVE_VPTt1 size, dag iops> @@ -4608,6 +4644,7 @@ let Inst{5} = Qm{3}; let Inst{3-1} = Qm{2-0}; let Inst{0} = fc{1}; + let validForTailPredication = 1; } class MVE_VPTt1i size> @@ -4709,6 +4746,7 @@ let Defs = [VPR]; let Predicates = [HasMVEFloat]; + let validForTailPredication = 1; } class MVE_VPTft1 @@ -4754,6 +4792,7 @@ let Unpredictable{5} = 0b1; let Defs = [VPR]; + let validForTailPredication = 1; } def MVE_VPSEL : MVE_p<(outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm), NoItinerary, @@ -4777,6 +4816,7 @@ let Inst{4} = 0b0; let Inst{3-1} = Qm{2-0}; let Inst{0} = 0b1; + let validForTailPredication = 1; } foreach suffix = ["s8", "s16", "s32", "u8", "u16", "u32", diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h --- a/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h +++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h @@ -393,9 +393,8 @@ // in an IT block). ThumbArithFlagSetting = 1 << 19, - // Whether an instruction should be excluded from an MVE tail-predicated - // loop. - InvalidForTailPredication = 1 << 20, + // Whether an instruction can be included in an MVE tail-predicated loop. + ValidForTailPredication = 1 << 20, //===------------------------------------------------------------------===// // Code domain. diff --git a/llvm/unittests/Target/ARM/CMakeLists.txt b/llvm/unittests/Target/ARM/CMakeLists.txt --- a/llvm/unittests/Target/ARM/CMakeLists.txt +++ b/llvm/unittests/Target/ARM/CMakeLists.txt @@ -1,13 +1,15 @@ include_directories( - ${CMAKE_SOURCE_DIR}/lib/Target/ARM - ${CMAKE_BINARY_DIR}/lib/Target/ARM + ${LLVM_MAIN_SRC_DIR}/lib/Target/ARM + ${LLVM_BINARY_DIR}/lib/Target/ARM ) set(LLVM_LINK_COMPONENTS ARMCodeGen ARMDesc ARMInfo + CodeGen MC + SelectionDAG Support Target ) diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -12,13 +12,458 @@ // Test for instructions that aren't immediately obviously valid within a // tail-predicated loop. This should be marked up in their tablegen -// descriptions. Currently the horizontal vector operations are tagged. -// TODO Add instructions that perform: -// - truncation, -// - extensions, -// - byte swapping, -// - others? -TEST(MachineInstrInvalidTailPredication, IsCorrect) { +// descriptions. Currently we, conservatively, disallow: +// - cross beat carries. +// - narrowing of results. +// - top/bottom operations. +// - complex operations. +// - horizontal operations. +// - byte swapping. +// - interleaved memory instructions. +// TODO: Add to this list once we can handle them safely. +TEST(MachineInstrValidTailPredication, IsCorrect) { + + using namespace ARM; + + auto IsValidTPOpcode = [](unsigned Opcode) { + switch (Opcode) { + default: + return false; + case MVE_ASRLi: + case MVE_ASRLr: + case MVE_LSRL: + case MVE_SQRSHR: + case MVE_SQSHL: + case MVE_SRSHR: + case MVE_UQRSHL: + case MVE_UQSHL: + case MVE_URSHR: + case MVE_VABDf16: + case MVE_VABDf32: + case MVE_VABDs16: + case MVE_VABDs32: + case MVE_VABDs8: + case MVE_VABDu16: + case MVE_VABDu32: + case MVE_VABDu8: + case MVE_VABSf16: + case MVE_VABSf32: + case MVE_VABSs16: + case MVE_VABSs32: + case MVE_VABSs8: + case MVE_VADD_qr_f16: + case MVE_VADD_qr_f32: + case MVE_VADD_qr_i16: + case MVE_VADD_qr_i32: + case MVE_VADD_qr_i8: + case MVE_VADDf16: + case MVE_VADDf32: + case MVE_VADDi16: + case MVE_VADDi32: + case MVE_VADDi8: + case MVE_VAND: + case MVE_VBIC: + case MVE_VBICIZ0v4i32: + case MVE_VBICIZ0v8i16: + case MVE_VBICIZ16v4i32: + case MVE_VBICIZ24v4i32: + case MVE_VBICIZ8v4i32: + case MVE_VBICIZ8v8i16: + case MVE_VBRSR16: + case MVE_VBRSR32: + case MVE_VBRSR8: + case MVE_VCLSs16: + case MVE_VCLSs32: + case MVE_VCLSs8: + case MVE_VCLZs16: + case MVE_VCLZs32: + case MVE_VCLZs8: + case MVE_VCMPf16: + case MVE_VCMPf16r: + case MVE_VCMPf32: + case MVE_VCMPf32r: + case MVE_VCMPi16: + case MVE_VCMPi16r: + case MVE_VCMPi32: + case MVE_VCMPi32r: + case MVE_VCMPi8: + case MVE_VCMPi8r: + case MVE_VCMPs16: + case MVE_VCMPs16r: + case MVE_VCMPs32: + case MVE_VCMPs32r: + case MVE_VCMPs8: + case MVE_VCMPs8r: + case MVE_VCMPu16: + case MVE_VCMPu16r: + case MVE_VCMPu32: + case MVE_VCMPu32r: + case MVE_VCMPu8: + case MVE_VCMPu8r: + case MVE_VCTP16: + case MVE_VCTP32: + case MVE_VCTP64: + case MVE_VCTP8: + case MVE_VCVTf16s16_fix: + case MVE_VCVTf16s16n: + case MVE_VCVTf16u16_fix: + case MVE_VCVTf16u16n: + case MVE_VCVTf32s32_fix: + case MVE_VCVTf32s32n: + case MVE_VCVTf32u32_fix: + case MVE_VCVTf32u32n: + case MVE_VCVTs16f16_fix: + case MVE_VCVTs16f16a: + case MVE_VCVTs16f16m: + case MVE_VCVTs16f16n: + case MVE_VCVTs16f16p: + case MVE_VCVTs16f16z: + case MVE_VCVTs32f32_fix: + case MVE_VCVTs32f32a: + case MVE_VCVTs32f32m: + case MVE_VCVTs32f32n: + case MVE_VCVTs32f32p: + case MVE_VCVTs32f32z: + case MVE_VCVTu16f16_fix: + case MVE_VCVTu16f16a: + case MVE_VCVTu16f16m: + case MVE_VCVTu16f16n: + case MVE_VCVTu16f16p: + case MVE_VCVTu16f16z: + case MVE_VCVTu32f32_fix: + case MVE_VCVTu32f32a: + case MVE_VCVTu32f32m: + case MVE_VCVTu32f32n: + case MVE_VCVTu32f32p: + case MVE_VCVTu32f32z: + case MVE_VDDUPu16: + case MVE_VDDUPu32: + case MVE_VDDUPu8: + case MVE_VDUP16: + case MVE_VDUP32: + case MVE_VDUP8: + case MVE_VDWDUPu16: + case MVE_VDWDUPu32: + case MVE_VDWDUPu8: + case MVE_VEOR: + case MVE_VFMA_qr_Sf16: + case MVE_VFMA_qr_Sf32: + case MVE_VFMA_qr_f16: + case MVE_VFMA_qr_f32: + case MVE_VMLAS_qr_s16: + case MVE_VMLAS_qr_s32: + case MVE_VMLAS_qr_s8: + case MVE_VMLAS_qr_u16: + case MVE_VMLAS_qr_u32: + case MVE_VMLAS_qr_u8: + case MVE_VMLA_qr_s16: + case MVE_VMLA_qr_s32: + case MVE_VMLA_qr_s8: + case MVE_VMLA_qr_u16: + case MVE_VMLA_qr_u32: + case MVE_VMLA_qr_u8: + case MVE_VHADD_qr_s16: + case MVE_VHADD_qr_s32: + case MVE_VHADD_qr_s8: + case MVE_VHADD_qr_u16: + case MVE_VHADD_qr_u32: + case MVE_VHADD_qr_u8: + case MVE_VHADDs16: + case MVE_VHADDs32: + case MVE_VHADDs8: + case MVE_VHADDu16: + case MVE_VHADDu32: + case MVE_VHADDu8: + case MVE_VHSUB_qr_s16: + case MVE_VHSUB_qr_s32: + case MVE_VHSUB_qr_s8: + case MVE_VHSUB_qr_u16: + case MVE_VHSUB_qr_u32: + case MVE_VHSUB_qr_u8: + case MVE_VHSUBs16: + case MVE_VHSUBs32: + case MVE_VHSUBs8: + case MVE_VHSUBu16: + case MVE_VHSUBu32: + case MVE_VHSUBu8: + case MVE_VIDUPu16: + case MVE_VIDUPu32: + case MVE_VIDUPu8: + case MVE_VIWDUPu16: + case MVE_VIWDUPu32: + case MVE_VIWDUPu8: + case MVE_VLDRBS16: + case MVE_VLDRBS16_post: + case MVE_VLDRBS16_pre: + case MVE_VLDRBS16_rq: + case MVE_VLDRBS32: + case MVE_VLDRBS32_post: + case MVE_VLDRBS32_pre: + case MVE_VLDRBS32_rq: + case MVE_VLDRBU16: + case MVE_VLDRBU16_post: + case MVE_VLDRBU16_pre: + case MVE_VLDRBU16_rq: + case MVE_VLDRBU32: + case MVE_VLDRBU32_post: + case MVE_VLDRBU32_pre: + case MVE_VLDRBU32_rq: + case MVE_VLDRBU8: + case MVE_VLDRBU8_post: + case MVE_VLDRBU8_pre: + case MVE_VLDRBU8_rq: + case MVE_VLDRDU64_qi: + case MVE_VLDRDU64_qi_pre: + case MVE_VLDRDU64_rq: + case MVE_VLDRDU64_rq_u: + case MVE_VLDRHS32: + case MVE_VLDRHS32_post: + case MVE_VLDRHS32_pre: + case MVE_VLDRHS32_rq: + case MVE_VLDRHS32_rq_u: + case MVE_VLDRHU16: + case MVE_VLDRHU16_post: + case MVE_VLDRHU16_pre: + case MVE_VLDRHU16_rq: + case MVE_VLDRHU16_rq_u: + case MVE_VLDRHU32: + case MVE_VLDRHU32_post: + case MVE_VLDRHU32_pre: + case MVE_VLDRHU32_rq: + case MVE_VLDRHU32_rq_u: + case MVE_VLDRWU32: + case MVE_VLDRWU32_post: + case MVE_VLDRWU32_pre: + case MVE_VLDRWU32_qi: + case MVE_VLDRWU32_qi_pre: + case MVE_VLDRWU32_rq: + case MVE_VLDRWU32_rq_u: + case MVE_VMOVimmf32: + case MVE_VMOVimmi16: + case MVE_VMOVimmi32: + case MVE_VMOVimmi64: + case MVE_VMOVimmi8: + case MVE_VMUL_qr_f16: + case MVE_VMUL_qr_f32: + case MVE_VMUL_qr_i16: + case MVE_VMUL_qr_i32: + case MVE_VMUL_qr_i8: + case MVE_VMULf16: + case MVE_VMULf32: + case MVE_VMVN: + case MVE_VMVNimmi16: + case MVE_VMVNimmi32: + case MVE_VNEGf16: + case MVE_VNEGf32: + case MVE_VNEGs16: + case MVE_VNEGs32: + case MVE_VNEGs8: + case MVE_VORN: + case MVE_VORR: + case MVE_VORRIZ0v4i32: + case MVE_VORRIZ0v8i16: + case MVE_VORRIZ16v4i32: + case MVE_VORRIZ24v4i32: + case MVE_VORRIZ8v4i32: + case MVE_VORRIZ8v8i16: + case MVE_VPNOT: + case MVE_VPSEL: + case MVE_VPST: + case MVE_VPTv16i8: + case MVE_VPTv16i8r: + case MVE_VPTv16s8: + case MVE_VPTv16s8r: + case MVE_VPTv16u8: + case MVE_VPTv16u8r: + case MVE_VPTv4f32: + case MVE_VPTv4f32r: + case MVE_VPTv4i32: + case MVE_VPTv4i32r: + case MVE_VPTv4s32: + case MVE_VPTv4s32r: + case MVE_VPTv4u32: + case MVE_VPTv4u32r: + case MVE_VPTv8f16: + case MVE_VPTv8f16r: + case MVE_VPTv8i16: + case MVE_VPTv8i16r: + case MVE_VPTv8s16: + case MVE_VPTv8s16r: + case MVE_VPTv8u16: + case MVE_VPTv8u16r: + case MVE_VQABSs16: + case MVE_VQABSs32: + case MVE_VQABSs8: + case MVE_VQADD_qr_s16: + case MVE_VQADD_qr_s32: + case MVE_VQADD_qr_s8: + case MVE_VQADD_qr_u16: + case MVE_VQADD_qr_u32: + case MVE_VQADD_qr_u8: + case MVE_VQADDs16: + case MVE_VQADDs32: + case MVE_VQADDs8: + case MVE_VQADDu16: + case MVE_VQADDu32: + case MVE_VQADDu8: + case MVE_VQNEGs16: + case MVE_VQNEGs32: + case MVE_VQNEGs8: + case MVE_VQRSHL_by_vecs16: + case MVE_VQRSHL_by_vecs32: + case MVE_VQRSHL_by_vecs8: + case MVE_VQRSHL_by_vecu16: + case MVE_VQRSHL_by_vecu32: + case MVE_VQRSHL_by_vecu8: + case MVE_VQRSHL_qrs16: + case MVE_VQRSHL_qrs32: + case MVE_VQRSHL_qrs8: + case MVE_VQRSHL_qru16: + case MVE_VQRSHL_qru8: + case MVE_VQRSHL_qru32: + case MVE_VQSHLU_imms16: + case MVE_VQSHLU_imms32: + case MVE_VQSHLU_imms8: + case MVE_VQSHL_by_vecs16: + case MVE_VQSHL_by_vecs32: + case MVE_VQSHL_by_vecs8: + case MVE_VQSHL_by_vecu16: + case MVE_VQSHL_by_vecu32: + case MVE_VQSHL_by_vecu8: + case MVE_VQSHL_qrs16: + case MVE_VQSHL_qrs32: + case MVE_VQSHL_qrs8: + case MVE_VQSHL_qru16: + case MVE_VQSHL_qru32: + case MVE_VQSHL_qru8: + case MVE_VQSUB_qr_s16: + case MVE_VQSUB_qr_s32: + case MVE_VQSUB_qr_s8: + case MVE_VQSUB_qr_u16: + case MVE_VQSUB_qr_u32: + case MVE_VQSUB_qr_u8: + case MVE_VQSUBs16: + case MVE_VQSUBs32: + case MVE_VQSUBs8: + case MVE_VQSUBu16: + case MVE_VQSUBu32: + case MVE_VQSUBu8: + case MVE_VRHADDs16: + case MVE_VRHADDs32: + case MVE_VRHADDs8: + case MVE_VRHADDu16: + case MVE_VRHADDu32: + case MVE_VRHADDu8: + case MVE_VRINTf16A: + case MVE_VRINTf16M: + case MVE_VRINTf16N: + case MVE_VRINTf16P: + case MVE_VRINTf16X: + case MVE_VRINTf16Z: + case MVE_VRINTf32A: + case MVE_VRINTf32M: + case MVE_VRINTf32N: + case MVE_VRINTf32P: + case MVE_VRINTf32X: + case MVE_VRINTf32Z: + case MVE_VRSHL_by_vecs16: + case MVE_VRSHL_by_vecs32: + case MVE_VRSHL_by_vecs8: + case MVE_VRSHL_by_vecu16: + case MVE_VRSHL_by_vecu32: + case MVE_VRSHL_by_vecu8: + case MVE_VRSHL_qrs16: + case MVE_VRSHL_qrs32: + case MVE_VRSHL_qrs8: + case MVE_VRSHL_qru16: + case MVE_VRSHL_qru32: + case MVE_VRSHL_qru8: + case MVE_VRSHR_imms16: + case MVE_VRSHR_imms32: + case MVE_VRSHR_imms8: + case MVE_VRSHR_immu16: + case MVE_VRSHR_immu32: + case MVE_VRSHR_immu8: + case MVE_VSHL_by_vecs16: + case MVE_VSHL_by_vecs32: + case MVE_VSHL_by_vecs8: + case MVE_VSHL_by_vecu16: + case MVE_VSHL_by_vecu32: + case MVE_VSHL_by_vecu8: + case MVE_VSHL_immi16: + case MVE_VSHL_immi32: + case MVE_VSHL_immi8: + case MVE_VSHL_qrs16: + case MVE_VSHL_qrs32: + case MVE_VSHL_qrs8: + case MVE_VSHL_qru16: + case MVE_VSHL_qru32: + case MVE_VSHL_qru8: + case MVE_VSHR_imms16: + case MVE_VSHR_imms32: + case MVE_VSHR_imms8: + case MVE_VSHR_immu16: + case MVE_VSHR_immu32: + case MVE_VSHR_immu8: + case MVE_VSLIimm16: + case MVE_VSLIimm32: + case MVE_VSLIimm8: + case MVE_VSLIimms16: + case MVE_VSLIimms32: + case MVE_VSLIimms8: + case MVE_VSLIimmu16: + case MVE_VSLIimmu32: + case MVE_VSLIimmu8: + case MVE_VSRIimm16: + case MVE_VSRIimm32: + case MVE_VSRIimm8: + case MVE_VSTRB16: + case MVE_VSTRB16_post: + case MVE_VSTRB16_pre: + case MVE_VSTRB16_rq: + case MVE_VSTRB32: + case MVE_VSTRB32_post: + case MVE_VSTRB32_pre: + case MVE_VSTRB32_rq: + case MVE_VSTRB8_rq: + case MVE_VSTRBU8: + case MVE_VSTRBU8_post: + case MVE_VSTRBU8_pre: + case MVE_VSTRD64_qi: + case MVE_VSTRD64_qi_pre: + case MVE_VSTRD64_rq: + case MVE_VSTRD64_rq_u: + case MVE_VSTRH16_rq: + case MVE_VSTRH16_rq_u: + case MVE_VSTRH32: + case MVE_VSTRH32_post: + case MVE_VSTRH32_pre: + case MVE_VSTRH32_rq: + case MVE_VSTRH32_rq_u: + case MVE_VSTRHU16: + case MVE_VSTRHU16_post: + case MVE_VSTRHU16_pre: + case MVE_VSTRW32_qi: + case MVE_VSTRW32_qi_pre: + case MVE_VSTRW32_rq: + case MVE_VSTRW32_rq_u: + case MVE_VSTRWU32: + case MVE_VSTRWU32_post: + case MVE_VSTRWU32_pre: + case MVE_VSUB_qr_f16: + case MVE_VSUB_qr_f32: + case MVE_VSUB_qr_i16: + case MVE_VSUB_qr_i32: + case MVE_VSUB_qr_i8: + case MVE_VSUBf16: + case MVE_VSUBf32: + case MVE_VSUBi16: + case MVE_VSUBi32: + case MVE_VSUBi8: + return true; + } + }; + LLVMInitializeARMTargetInfo(); LLVMInitializeARMTarget(); LLVMInitializeARMTargetMC(); @@ -36,131 +481,27 @@ static_cast( T->createTargetMachine(TT, "generic", "", Options, None, None, CodeGenOpt::Default))); + ARMSubtarget ST(TM->getTargetTriple(), TM->getTargetCPU(), + TM->getTargetFeatureString(), + *static_cast(TM.get()), false); + const ARMBaseInstrInfo *TII = ST.getInstrInfo(); auto MII = TM->getMCInstrInfo(); - using namespace ARM; + for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) { + const MCInstrDesc &Desc = TII->get(i); - auto IsInvalidTPOpcode = [](unsigned Opcode) { - switch (Opcode) { - case MVE_VABAVs8: - case MVE_VABAVs16: - case MVE_VABAVs32: - case MVE_VABAVu8: - case MVE_VABAVu16: - case MVE_VABAVu32: - case MVE_VADDVs8acc: - case MVE_VADDVs16acc: - case MVE_VADDVs32acc: - case MVE_VADDVu8acc: - case MVE_VADDVu16acc: - case MVE_VADDVu32acc: - case MVE_VADDVs8no_acc: - case MVE_VADDVs16no_acc: - case MVE_VADDVs32no_acc: - case MVE_VADDVu8no_acc: - case MVE_VADDVu16no_acc: - case MVE_VADDVu32no_acc: - case MVE_VADDLVs32no_acc: - case MVE_VADDLVu32no_acc: - case MVE_VADDLVs32acc: - case MVE_VADDLVu32acc: - case MVE_VMLADAVas16: - case MVE_VMLADAVas32: - case MVE_VMLADAVas8: - case MVE_VMLADAVau16: - case MVE_VMLADAVau32: - case MVE_VMLADAVau8: - case MVE_VMLADAVaxs16: - case MVE_VMLADAVaxs32: - case MVE_VMLADAVaxs8: - case MVE_VMLADAVs16: - case MVE_VMLADAVs32: - case MVE_VMLADAVs8: - case MVE_VMLADAVu16: - case MVE_VMLADAVu32: - case MVE_VMLADAVu8: - case MVE_VMLADAVxs16: - case MVE_VMLADAVxs32: - case MVE_VMLADAVxs8: - case MVE_VMLALDAVas16: - case MVE_VMLALDAVas32: - case MVE_VMLALDAVau16: - case MVE_VMLALDAVau32: - case MVE_VMLALDAVaxs16: - case MVE_VMLALDAVaxs32: - case MVE_VMLALDAVs16: - case MVE_VMLALDAVs32: - case MVE_VMLALDAVu16: - case MVE_VMLALDAVu32: - case MVE_VMLALDAVxs16: - case MVE_VMLALDAVxs32: - case MVE_VMLSDAVas16: - case MVE_VMLSDAVas32: - case MVE_VMLSDAVas8: - case MVE_VMLSDAVaxs16: - case MVE_VMLSDAVaxs32: - case MVE_VMLSDAVaxs8: - case MVE_VMLSDAVs16: - case MVE_VMLSDAVs32: - case MVE_VMLSDAVs8: - case MVE_VMLSDAVxs16: - case MVE_VMLSDAVxs32: - case MVE_VMLSDAVxs8: - case MVE_VMLSLDAVas16: - case MVE_VMLSLDAVas32: - case MVE_VMLSLDAVaxs16: - case MVE_VMLSLDAVaxs32: - case MVE_VMLSLDAVs16: - case MVE_VMLSLDAVs32: - case MVE_VMLSLDAVxs16: - case MVE_VMLSLDAVxs32: - case MVE_VRMLALDAVHas32: - case MVE_VRMLALDAVHau32: - case MVE_VRMLALDAVHaxs32: - case MVE_VRMLALDAVHs32: - case MVE_VRMLALDAVHu32: - case MVE_VRMLALDAVHxs32: - case MVE_VRMLSLDAVHas32: - case MVE_VRMLSLDAVHaxs32: - case MVE_VRMLSLDAVHs32: - case MVE_VRMLSLDAVHxs32: - case MVE_VMAXNMVf16: - case MVE_VMINNMVf16: - case MVE_VMAXNMVf32: - case MVE_VMINNMVf32: - case MVE_VMAXNMAVf16: - case MVE_VMINNMAVf16: - case MVE_VMAXNMAVf32: - case MVE_VMINNMAVf32: - case MVE_VMAXVs8: - case MVE_VMAXVs16: - case MVE_VMAXVs32: - case MVE_VMAXVu8: - case MVE_VMAXVu16: - case MVE_VMAXVu32: - case MVE_VMINVs8: - case MVE_VMINVs16: - case MVE_VMINVs32: - case MVE_VMINVu8: - case MVE_VMINVu16: - case MVE_VMINVu32: - case MVE_VMAXAVs8: - case MVE_VMAXAVs16: - case MVE_VMAXAVs32: - case MVE_VMINAVs8: - case MVE_VMINAVs16: - case MVE_VMINAVs32: - return true; - default: - return false; - } - }; + for (auto &Op : Desc.operands()) { + // Only check instructions that access the MQPR regs. + if ((Op.OperandType & MCOI::OPERAND_REGISTER) == 0 || + Op.RegClass != ARM::MQPRRegClassID) + continue; - for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) { - uint64_t Flags = MII->get(i).TSFlags; - bool Invalid = (Flags & ARMII::InvalidForTailPredication) != 0; - ASSERT_EQ(IsInvalidTPOpcode(i), Invalid) - << MII->getName(i) - << ": mismatched expectation for tail-predicated safety\n"; + uint64_t Flags = MII->get(i).TSFlags; + bool Valid = (Flags & ARMII::ValidForTailPredication) != 0; + ASSERT_EQ(IsValidTPOpcode(i), Valid) + << MII->getName(i) + << ": mismatched expectation for tail-predicated safety\n"; + break; + } } }