diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -3967,6 +3967,7 @@ let Inst{8} = 0b0; let Inst{7} = Qn{3}; let Inst{0} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VMULL_m { @@ -4382,6 +4384,7 @@ let Inst{12} = T; let Inst{8} = 0b1; let Inst{5} = 0b1; + let validForTailPredication = 1; } multiclass MVE_VQDMULL_qr_halves { diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -15,7 +15,6 @@ // descriptions. Currently we, conservatively, disallow: // - cross beat carries. // - narrowing of results. -// - top/bottom operations. // - complex operations. // - horizontal operations. // - byte swapping. @@ -243,6 +242,22 @@ case MVE_VMOVimmi32: case MVE_VMOVimmi64: case MVE_VMOVimmi8: + case MVE_VMULLBp16: + case MVE_VMULLBp8: + case MVE_VMULLBs16: + case MVE_VMULLBs32: + case MVE_VMULLBs8: + case MVE_VMULLBu16: + case MVE_VMULLBu32: + case MVE_VMULLBu8: + case MVE_VMULLTp16: + case MVE_VMULLTp8: + case MVE_VMULLTs16: + case MVE_VMULLTs32: + case MVE_VMULLTs8: + case MVE_VMULLTu16: + case MVE_VMULLTu32: + case MVE_VMULLTu8: case MVE_VMUL_qr_f16: case MVE_VMUL_qr_f32: case MVE_VMUL_qr_i16: @@ -287,6 +302,14 @@ case MVE_VQADDu16: case MVE_VQADDu32: case MVE_VQADDu8: + case MVE_VQDMULL_qr_s16bh: + case MVE_VQDMULL_qr_s16th: + case MVE_VQDMULL_qr_s32bh: + case MVE_VQDMULL_qr_s32th: + case MVE_VQDMULLs16bh: + case MVE_VQDMULLs16th: + case MVE_VQDMULLs32bh: + case MVE_VQDMULLs32th: case MVE_VQNEGs16: case MVE_VQNEGs32: case MVE_VQNEGs8: