diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1918,6 +1918,7 @@ let Inst{12-8} = 0b01011; let Inst{4} = 0b0; let Inst{0} = 0b0; + let validForTailPredication = 1; } multiclass MVE_VQxDMULH_mThis Inner Loop Header: Depth=1 -; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: vpst -; CHECK-NEXT: vldrwt.u32 q4, [r1], #16 +; CHECK-NEXT: vldrw.u32 q4, [r1], #16 ; CHECK-NEXT: vabs.s32 q5, q4 ; CHECK-NEXT: vcls.s32 q3, q5 ; CHECK-NEXT: vshl.u32 q5, q5, q3 @@ -41,15 +31,13 @@ ; CHECK-NEXT: vqshl.s32 q5, q5, #1 ; CHECK-NEXT: vpt.s32 lt, q4, zr ; CHECK-NEXT: vnegt.s32 q5, q5 -; CHECK-NEXT: vctp.32 r12 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q4, [r0], #16 ; CHECK-NEXT: vqrdmulh.s32 q4, q4, q5 ; CHECK-NEXT: vpstt ; CHECK-NEXT: vstrwt.32 q4, [r2], #16 ; CHECK-NEXT: vstrwt.32 q3, [r3], #16 -; CHECK-NEXT: le lr, .LBB0_1 +; CHECK-NEXT: letp lr, .LBB0_1 ; CHECK-NEXT: @ %bb.2: @ %bb44 ; CHECK-NEXT: vpop {d8, d9, d10, d11, d12, d13, d14, d15} ; CHECK-NEXT: pop {r4, r5, r7, pc} diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -754,6 +754,12 @@ case MVE_VQADDu16: case MVE_VQADDu32: case MVE_VQADDu8: + case MVE_VQDMULH_qr_s16: + case MVE_VQDMULH_qr_s32: + case MVE_VQDMULH_qr_s8: + case MVE_VQDMULHi16: + case MVE_VQDMULHi32: + case MVE_VQDMULHi8: case MVE_VQDMULL_qr_s16bh: case MVE_VQDMULL_qr_s16th: case MVE_VQDMULL_qr_s32bh: @@ -762,6 +768,12 @@ case MVE_VQDMULLs16th: case MVE_VQDMULLs32bh: case MVE_VQDMULLs32th: + case MVE_VQRDMULH_qr_s16: + case MVE_VQRDMULH_qr_s32: + case MVE_VQRDMULH_qr_s8: + case MVE_VQRDMULHi16: + case MVE_VQRDMULHi32: + case MVE_VQRDMULHi8: case MVE_VQNEGs16: case MVE_VQNEGs32: case MVE_VQNEGs8: