diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -5625,7 +5625,7 @@ multiclass MVE_VMLA_qr_multi { - def "": MVE_VFMAMLA_qr; defvar Inst = !cast(NAME); defvar pred_int = !cast("int_arm_mve_" # iname # "_n_predicated"); @@ -5635,10 +5635,7 @@ defvar s = (i32 rGPR:$s); defvar pred = (VTI.Pred VCCR:$pred); - // The signed and unsigned variants of this instruction have different - // encodings, but they're functionally identical. For the sake of - // determinism, we generate only the unsigned variant. - if VTI.Unsigned then let Predicates = [HasMVEInt] in { + let Predicates = [HasMVEInt] in { if scalar_addend then { def : Pat<(VTI.Vec (add (mul v1, v2), vs)), (VTI.Vec (Inst v1, v2, s))>; @@ -5652,19 +5649,13 @@ } } -defm MVE_VMLA_qr_s8 : MVE_VMLA_qr_multi<"vmla", MVE_v16s8, 0b0>; -defm MVE_VMLA_qr_s16 : MVE_VMLA_qr_multi<"vmla", MVE_v8s16, 0b0>; -defm MVE_VMLA_qr_s32 : MVE_VMLA_qr_multi<"vmla", MVE_v4s32, 0b0>; -defm MVE_VMLA_qr_u8 : MVE_VMLA_qr_multi<"vmla", MVE_v16u8, 0b0>; -defm MVE_VMLA_qr_u16 : MVE_VMLA_qr_multi<"vmla", MVE_v8u16, 0b0>; -defm MVE_VMLA_qr_u32 : MVE_VMLA_qr_multi<"vmla", MVE_v4u32, 0b0>; +defm MVE_VMLA_qr_i8 : MVE_VMLA_qr_multi<"vmla", MVE_v16i8, 0b0>; +defm MVE_VMLA_qr_i16 : MVE_VMLA_qr_multi<"vmla", MVE_v8i16, 0b0>; +defm MVE_VMLA_qr_i32 : MVE_VMLA_qr_multi<"vmla", MVE_v4i32, 0b0>; -defm MVE_VMLAS_qr_s8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16s8, 0b1>; -defm MVE_VMLAS_qr_s16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8s16, 0b1>; -defm MVE_VMLAS_qr_s32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4s32, 0b1>; -defm MVE_VMLAS_qr_u8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16u8, 0b1>; -defm MVE_VMLAS_qr_u16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8u16, 0b1>; -defm MVE_VMLAS_qr_u32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4u32, 0b1>; +defm MVE_VMLAS_qr_i8 : MVE_VMLA_qr_multi<"vmlas", MVE_v16i8, 0b1>; +defm MVE_VMLAS_qr_i16 : MVE_VMLA_qr_multi<"vmlas", MVE_v8i16, 0b1>; +defm MVE_VMLAS_qr_i32 : MVE_VMLA_qr_multi<"vmlas", MVE_v4i32, 0b1>; multiclass MVE_VFMA_qr_multi { diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -24,7 +24,7 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q2, [r1], #4 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmla.u32 q0, q2, r0 +; CHECK-NEXT: vmla.i32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB0_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 @@ -89,7 +89,7 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.s32 q2, [r1], #8 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmla.u32 q0, q2, r0 +; CHECK-NEXT: vmla.i32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB1_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 @@ -154,7 +154,7 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrbt.u32 q2, [r1], #4 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmla.u32 q0, q2, r0 +; CHECK-NEXT: vmla.i32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB2_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 @@ -219,7 +219,7 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrht.u32 q2, [r1], #8 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmla.u32 q0, q2, r0 +; CHECK-NEXT: vmla.i32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB3_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 @@ -284,7 +284,7 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r1], #16 ; CHECK-NEXT: vmov q1, q0 -; CHECK-NEXT: vmla.u32 q0, q2, r0 +; CHECK-NEXT: vmla.i32 q0, q2, r0 ; CHECK-NEXT: le lr, .LBB4_2 ; CHECK-NEXT: @ %bb.3: @ %middle.block ; CHECK-NEXT: vpsel q0, q0, q1 @@ -361,7 +361,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 -; CHECK-NEXT: vmlas.u32 q1, q0, r2 +; CHECK-NEXT: vmlas.i32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 ; CHECK-NEXT: letp lr, .LBB5_5 ; CHECK-NEXT: b .LBB5_11 @@ -559,7 +559,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.s32 q0, [r0], #8 ; CHECK-NEXT: vldrh.s32 q1, [r1], #8 -; CHECK-NEXT: vmlas.u32 q1, q0, r2 +; CHECK-NEXT: vmlas.i32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 ; CHECK-NEXT: letp lr, .LBB6_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -637,7 +637,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrb.u32 q0, [r0], #4 ; CHECK-NEXT: vldrb.u32 q1, [r1], #4 -; CHECK-NEXT: vmlas.u32 q1, q0, r2 +; CHECK-NEXT: vmlas.i32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 ; CHECK-NEXT: letp lr, .LBB7_5 ; CHECK-NEXT: b .LBB7_11 @@ -835,7 +835,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrh.u32 q0, [r0], #8 ; CHECK-NEXT: vldrh.u32 q1, [r1], #8 -; CHECK-NEXT: vmlas.u32 q1, q0, r2 +; CHECK-NEXT: vmlas.i32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 ; CHECK-NEXT: letp lr, .LBB8_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -913,7 +913,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 -; CHECK-NEXT: vmlas.u32 q1, q0, r2 +; CHECK-NEXT: vmlas.i32 q1, q0, r2 ; CHECK-NEXT: vstrw.32 q1, [r3], #16 ; CHECK-NEXT: letp lr, .LBB9_5 ; CHECK-NEXT: b .LBB9_11 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/spillingmove.ll @@ -63,7 +63,7 @@ ; CHECK-NEXT: vshr.u16 q1, q0, #3 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmov q2, q4 -; CHECK-NEXT: vmla.u16 q2, q1, r2 +; CHECK-NEXT: vmla.i16 q2, q1, r2 ; CHECK-NEXT: vshr.u16 q1, q2, #5 ; CHECK-NEXT: vshl.i16 q2, q0, #3 ; CHECK-NEXT: vand q3, q1, q5 @@ -74,7 +74,7 @@ ; CHECK-NEXT: vmov q5, q4 ; CHECK-NEXT: vldrw.u32 q4, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vshr.u16 q0, q0, #9 -; CHECK-NEXT: vmla.u16 q4, q2, r2 +; CHECK-NEXT: vmla.i16 q4, q2, r2 ; CHECK-NEXT: vshr.u16 q2, q4, #11 ; CHECK-NEXT: vmov q4, q5 ; CHECK-NEXT: vmov q5, q6 @@ -83,7 +83,7 @@ ; CHECK-NEXT: vorr q1, q3, q2 ; CHECK-NEXT: vldrw.u32 q2, [sp, #16] @ 16-byte Reload ; CHECK-NEXT: vand q0, q0, q7 -; CHECK-NEXT: vmla.u16 q2, q0, r2 +; CHECK-NEXT: vmla.i16 q2, q0, r2 ; CHECK-NEXT: vldrw.u32 q0, [sp] @ 16-byte Reload ; CHECK-NEXT: vand q0, q2, q0 ; CHECK-NEXT: vldrw.u32 q2, [sp, #32] @ 16-byte Reload @@ -242,7 +242,7 @@ ; CHECK-NEXT: vshl.i16 q1, q0, #3 ; CHECK-NEXT: vand q1, q1, q2 ; CHECK-NEXT: vmov q2, q4 -; CHECK-NEXT: vmla.u16 q2, q1, r3 +; CHECK-NEXT: vmla.i16 q2, q1, r3 ; CHECK-NEXT: vshr.u16 q1, q0, #3 ; CHECK-NEXT: vand q1, q1, q5 ; CHECK-NEXT: vmov.f64 d14, d10 @@ -251,11 +251,11 @@ ; CHECK-NEXT: vmov.f64 d11, d9 ; CHECK-NEXT: vldrw.u32 q4, [sp, #32] @ 16-byte Reload ; CHECK-NEXT: vshr.u16 q0, q0, #9 -; CHECK-NEXT: vmla.u16 q4, q1, r3 +; CHECK-NEXT: vmla.i16 q4, q1, r3 ; CHECK-NEXT: vldrw.u32 q1, [sp, #48] @ 16-byte Reload ; CHECK-NEXT: vand q0, q0, q1 ; CHECK-NEXT: vldrw.u32 q1, [sp, #16] @ 16-byte Reload -; CHECK-NEXT: vmla.u16 q1, q0, r3 +; CHECK-NEXT: vmla.i16 q1, q0, r3 ; CHECK-NEXT: vshr.u16 q0, q2, #11 ; CHECK-NEXT: vshr.u16 q2, q4, #5 ; CHECK-NEXT: vand q2, q2, q6 diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir --- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir +++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vmldava_in_vpt.mir @@ -162,7 +162,7 @@ ; CHECK: renamable $r0, renamable $q3 = MVE_VLDRWU32_post killed renamable $r0, 4, 0, $noreg, $noreg :: (load (s128) from %ir.input_1_cast, align 4) ; CHECK: renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, $noreg, undef renamable $q2 ; CHECK: renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, $noreg, undef renamable $q3 - ; CHECK: renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg + ; CHECK: renamable $q3 = MVE_VMLAS_qr_i32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg ; CHECK: renamable $q2 = MVE_VMAXu32 killed renamable $q3, renamable $q1, 0, $noreg, $noreg, undef renamable $q2 ; CHECK: renamable $q3 = MVE_VMINu32 renamable $q2, renamable $q0, 0, $noreg, $noreg, undef renamable $q3 ; CHECK: renamable $r12 = MVE_VMLADAVas32 killed renamable $r12, killed renamable $q3, killed renamable $q2, 0, killed $noreg, $noreg @@ -210,7 +210,7 @@ renamable $q2 = MVE_VADD_qr_i32 killed renamable $q2, renamable $r3, 0, $noreg, $noreg, undef renamable $q2 renamable $q3 = MVE_VADD_qr_i32 killed renamable $q3, renamable $r2, 0, $noreg, $noreg, undef renamable $q3 renamable $r4, dead $cpsr = tSUBi8 killed renamable $r4, 4, 14 /* CC::al */, $noreg - renamable $q3 = MVE_VMLAS_qr_u32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg + renamable $q3 = MVE_VMLAS_qr_i32 killed renamable $q3, killed renamable $q2, renamable $r5, 0, $noreg, $noreg MVE_VPST 2, implicit $vpr renamable $q2 = MVE_VMAXu32 killed renamable $q3, renamable $q1, 1, renamable $vpr, $noreg, undef renamable $q2 renamable $q3 = MVE_VMINu32 renamable $q2, renamable $q0, 1, renamable $vpr, $noreg, undef renamable $q3 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-optimisation.ll @@ -339,7 +339,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmov q3, q0 ; CHECK-NEXT: vadd.i32 q2, q1, r4 -; CHECK-NEXT: vmla.u32 q3, q1, lr +; CHECK-NEXT: vmla.i32 q3, q1, lr ; CHECK-NEXT: vmul.i32 q1, q1, r12 ; CHECK-NEXT: vldrw.u32 q4, [q3, #24] ; CHECK-NEXT: subs r2, #4 @@ -401,11 +401,11 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vmov q4, q0 ; CHECK-NEXT: vadd.i32 q3, q2, r4 -; CHECK-NEXT: vmla.u32 q4, q2, lr +; CHECK-NEXT: vmla.i32 q4, q2, lr ; CHECK-NEXT: subs r2, #4 ; CHECK-NEXT: vldrw.u32 q5, [q4, #24] ; CHECK-NEXT: vmov q4, q1 -; CHECK-NEXT: vmla.u32 q4, q2, r12 +; CHECK-NEXT: vmla.i32 q4, q2, r12 ; CHECK-NEXT: vmov q2, q3 ; CHECK-NEXT: vstrb.8 q5, [r1], #16 ; CHECK-NEXT: vstrw.32 q4, [r3] @@ -490,7 +490,7 @@ ; CHECK-NEXT: dls lr, r10 ; CHECK-NEXT: vmov.i32 q4, #0x0 ; CHECK-NEXT: vadd.i32 q5, q5, q0 -; CHECK-NEXT: vmlas.u32 q6, q2, r5 +; CHECK-NEXT: vmlas.i32 q6, q2, r5 ; CHECK-NEXT: .LBB9_3: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB9_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB9_2 Depth=2 @@ -696,7 +696,7 @@ ; CHECK-NEXT: ldr r0, [sp, #16] @ 4-byte Reload ; CHECK-NEXT: vmov q5, q1 ; CHECK-NEXT: vmov.i32 q4, #0x0 -; CHECK-NEXT: vmlas.u32 q5, q2, r8 +; CHECK-NEXT: vmlas.i32 q5, q2, r8 ; CHECK-NEXT: dls lr, r0 ; CHECK-NEXT: ldr r3, [sp, #20] @ 4-byte Reload ; CHECK-NEXT: .LBB10_11: @ %vector.body @@ -909,11 +909,11 @@ ; CHECK-NEXT: vdup.32 q3, r5 ; CHECK-NEXT: vdup.32 q2, r7 ; CHECK-NEXT: vadd.i32 q4, q1, r4 -; CHECK-NEXT: vmla.u32 q3, q4, r2 +; CHECK-NEXT: vmla.i32 q3, q4, r2 ; CHECK-NEXT: adds r4, #113 ; CHECK-NEXT: vadd.i32 q4, q1, r4 ; CHECK-NEXT: mov r4, r8 -; CHECK-NEXT: vmla.u32 q2, q4, r2 +; CHECK-NEXT: vmla.i32 q2, q4, r2 ; CHECK-NEXT: .LBB11_5: @ %vector.body ; CHECK-NEXT: @ Parent Loop BB11_1 Depth=1 ; CHECK-NEXT: @ Parent Loop BB11_2 Depth=2 diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll --- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-tailpred.ll @@ -155,7 +155,7 @@ ; CHECK-NEXT: vpst ; CHECK-NEXT: vldrwt.u32 q2, [r4], #16 ; CHECK-NEXT: vmul.i32 q3, q2, r1 -; CHECK-NEXT: vmla.u32 q0, q2, r1 +; CHECK-NEXT: vmla.i32 q0, q2, r1 ; CHECK-NEXT: vpst ; CHECK-NEXT: vstrwt.32 q3, [q1, #80]! ; CHECK-NEXT: le lr, .LBB2_1 @@ -243,17 +243,17 @@ ; CHECK-NEXT: vldrb.u32 q5, [r0, q2] ; CHECK-NEXT: adds r0, #12 ; CHECK-NEXT: vmul.i32 q6, q4, r11 -; CHECK-NEXT: vmla.u32 q6, q3, r8 -; CHECK-NEXT: vmla.u32 q6, q5, r12 +; CHECK-NEXT: vmla.i32 q6, q3, r8 +; CHECK-NEXT: vmla.i32 q6, q5, r12 ; CHECK-NEXT: vadd.i32 q6, q6, r3 ; CHECK-NEXT: vshr.u32 q6, q6, #16 ; CHECK-NEXT: vstrb.32 q6, [r1, q1] ; CHECK-NEXT: vmul.i32 q6, q4, r4 ; CHECK-NEXT: vmul.i32 q4, q4, r10 -; CHECK-NEXT: vmla.u32 q6, q3, r5 -; CHECK-NEXT: vmla.u32 q4, q3, r7 -; CHECK-NEXT: vmla.u32 q6, q5, r6 -; CHECK-NEXT: vmla.u32 q4, q5, r9 +; CHECK-NEXT: vmla.i32 q6, q3, r5 +; CHECK-NEXT: vmla.i32 q4, q3, r7 +; CHECK-NEXT: vmla.i32 q6, q5, r6 +; CHECK-NEXT: vmla.i32 q4, q5, r9 ; CHECK-NEXT: vadd.i32 q6, q6, r3 ; CHECK-NEXT: vadd.i32 q3, q4, r3 ; CHECK-NEXT: vshr.u32 q6, q6, #16 diff --git a/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll b/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll --- a/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll +++ b/llvm/test/CodeGen/Thumb2/mve-intrinsics/ternary.ll @@ -104,7 +104,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) { ; CHECK-LABEL: test_vmlaq_n_s8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u8 q0, q1, r0 +; CHECK-NEXT: vmla.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0 @@ -117,7 +117,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) { ; CHECK-LABEL: test_vmlaq_n_s16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u16 q0, q1, r0 +; CHECK-NEXT: vmla.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0 @@ -130,7 +130,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-LABEL: test_vmlaq_n_s32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u32 q0, q1, r0 +; CHECK-NEXT: vmla.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0 @@ -143,7 +143,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlaq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) { ; CHECK-LABEL: test_vmlaq_n_u8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u8 q0, q1, r0 +; CHECK-NEXT: vmla.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %.splatinsert = insertelement <16 x i8> undef, i8 %c, i32 0 @@ -156,7 +156,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlaq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) { ; CHECK-LABEL: test_vmlaq_n_u16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u16 q0, q1, r0 +; CHECK-NEXT: vmla.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %.splatinsert = insertelement <8 x i16> undef, i16 %c, i32 0 @@ -169,7 +169,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlaq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-LABEL: test_vmlaq_n_u32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u32 q0, q1, r0 +; CHECK-NEXT: vmla.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %.splatinsert = insertelement <4 x i32> undef, i32 %c, i32 0 @@ -182,7 +182,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_s8(<16 x i8> %a, <16 x i8> %b, i8 signext %c) { ; CHECK-LABEL: test_vmlasq_n_s8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u8 q1, q0, r0 +; CHECK-NEXT: vmlas.i8 q1, q0, r0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -196,7 +196,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_s16(<8 x i16> %a, <8 x i16> %b, i16 signext %c) { ; CHECK-LABEL: test_vmlasq_n_s16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u16 q1, q0, r0 +; CHECK-NEXT: vmlas.i16 q1, q0, r0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -210,7 +210,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_s32(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-LABEL: test_vmlasq_n_s32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u32 q1, q0, r0 +; CHECK-NEXT: vmlas.i32 q1, q0, r0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -224,7 +224,7 @@ define arm_aapcs_vfpcc <16 x i8> @test_vmlasq_n_u8(<16 x i8> %a, <16 x i8> %b, i8 zeroext %c) { ; CHECK-LABEL: test_vmlasq_n_u8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u8 q1, q0, r0 +; CHECK-NEXT: vmlas.i8 q1, q0, r0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -238,7 +238,7 @@ define arm_aapcs_vfpcc <8 x i16> @test_vmlasq_n_u16(<8 x i16> %a, <8 x i16> %b, i16 zeroext %c) { ; CHECK-LABEL: test_vmlasq_n_u16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u16 q1, q0, r0 +; CHECK-NEXT: vmlas.i16 q1, q0, r0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -252,7 +252,7 @@ define arm_aapcs_vfpcc <4 x i32> @test_vmlasq_n_u32(<4 x i32> %a, <4 x i32> %b, i32 %c) { ; CHECK-LABEL: test_vmlasq_n_u32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u32 q1, q0, r0 +; CHECK-NEXT: vmlas.i32 q1, q0, r0 ; CHECK-NEXT: vmov q0, q1 ; CHECK-NEXT: bx lr entry: @@ -528,7 +528,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlat.u8 q0, q1, r0 +; CHECK-NEXT: vmlat.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i8 %c to i32 @@ -543,7 +543,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlat.u16 q0, q1, r0 +; CHECK-NEXT: vmlat.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %c to i32 @@ -558,7 +558,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlat.u32 q0, q1, r0 +; CHECK-NEXT: vmlat.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 @@ -572,7 +572,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlat.u8 q0, q1, r0 +; CHECK-NEXT: vmlat.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i8 %c to i32 @@ -587,7 +587,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlat.u16 q0, q1, r0 +; CHECK-NEXT: vmlat.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %c to i32 @@ -602,7 +602,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlat.u32 q0, q1, r0 +; CHECK-NEXT: vmlat.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 @@ -616,7 +616,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlast.u8 q0, q1, r0 +; CHECK-NEXT: vmlast.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i8 %c to i32 @@ -631,7 +631,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlast.u16 q0, q1, r0 +; CHECK-NEXT: vmlast.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %c to i32 @@ -646,7 +646,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlast.u32 q0, q1, r0 +; CHECK-NEXT: vmlast.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 @@ -660,7 +660,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlast.u8 q0, q1, r0 +; CHECK-NEXT: vmlast.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i8 %c to i32 @@ -675,7 +675,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlast.u16 q0, q1, r0 +; CHECK-NEXT: vmlast.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %c to i32 @@ -690,7 +690,7 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmsr p0, r1 ; CHECK-NEXT: vpst -; CHECK-NEXT: vmlast.u32 q0, q1, r0 +; CHECK-NEXT: vmlast.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = zext i16 %p to i32 diff --git a/llvm/test/CodeGen/Thumb2/mve-qrintr.ll b/llvm/test/CodeGen/Thumb2/mve-qrintr.ll --- a/llvm/test/CodeGen/Thumb2/mve-qrintr.ll +++ b/llvm/test/CodeGen/Thumb2/mve-qrintr.ll @@ -989,7 +989,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmla.u32 q1, q0, r3 +; CHECK-NEXT: vmla.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB22_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -1036,7 +1036,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmla.u32 q1, q0, r3 +; CHECK-NEXT: vmla.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB23_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -1080,7 +1080,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vmlas.u32 q1, q0, r3 +; CHECK-NEXT: vmlas.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB24_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -1127,7 +1127,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmlas.u32 q1, q0, r3 +; CHECK-NEXT: vmlas.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB25_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll b/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll --- a/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll +++ b/llvm/test/CodeGen/Thumb2/mve-qrintrsplat.ll @@ -968,7 +968,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmla.u32 q1, q0, r3 +; CHECK-NEXT: vmla.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB22_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -1014,7 +1014,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmla.u32 q1, q0, r3 +; CHECK-NEXT: vmla.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB23_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -1059,7 +1059,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1] -; CHECK-NEXT: vmlas.u32 q1, q0, r3 +; CHECK-NEXT: vmlas.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB24_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup @@ -1105,7 +1105,7 @@ ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 ; CHECK-NEXT: vldrw.u32 q0, [r1] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 -; CHECK-NEXT: vmlas.u32 q1, q0, r3 +; CHECK-NEXT: vmlas.i32 q1, q0, r3 ; CHECK-NEXT: vstrw.32 q1, [r1], #16 ; CHECK-NEXT: letp lr, .LBB25_2 ; CHECK-NEXT: @ %bb.3: @ %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-vmla.ll b/llvm/test/CodeGen/Thumb2/mve-vmla.ll --- a/llvm/test/CodeGen/Thumb2/mve-vmla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmla.ll @@ -4,7 +4,7 @@ define arm_aapcs_vfpcc <4 x i32> @vmlau32(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind { ; CHECK-LABEL: vmlau32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u32 q0, q1, r0 +; CHECK-NEXT: vmla.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <4 x i32> undef, i32 %X, i32 0 @@ -17,7 +17,7 @@ define arm_aapcs_vfpcc <4 x i32> @vmlau32b(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind { ; CHECK-LABEL: vmlau32b: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u32 q0, q1, r0 +; CHECK-NEXT: vmla.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <4 x i32> undef, i32 %X, i32 0 @@ -30,7 +30,7 @@ define arm_aapcs_vfpcc <8 x i16> @vmlau16(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind { ; CHECK-LABEL: vmlau16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u16 q0, q1, r0 +; CHECK-NEXT: vmla.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <8 x i16> undef, i16 %X, i32 0 @@ -43,7 +43,7 @@ define arm_aapcs_vfpcc <8 x i16> @vmlau16b(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind { ; CHECK-LABEL: vmlau16b: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u16 q0, q1, r0 +; CHECK-NEXT: vmla.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <8 x i16> undef, i16 %X, i32 0 @@ -56,7 +56,7 @@ define arm_aapcs_vfpcc <16 x i8> @vmlau8(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind { ; CHECK-LABEL: vmlau8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u8 q0, q1, r0 +; CHECK-NEXT: vmla.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <16 x i8> undef, i8 %X, i32 0 @@ -69,7 +69,7 @@ define arm_aapcs_vfpcc <16 x i8> @vmlau8b(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind { ; CHECK-LABEL: vmlau8b: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmla.u8 q0, q1, r0 +; CHECK-NEXT: vmla.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <16 x i8> undef, i8 %X, i32 0 @@ -87,7 +87,7 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r2] ; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: vmla.u32 q1, q0, r1 +; CHECK-NEXT: vmla.i32 q1, q0, r1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: bne .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup @@ -125,7 +125,7 @@ ; CHECK-NEXT: vldrh.u16 q0, [r0], #16 ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: subs r3, #8 -; CHECK-NEXT: vmla.u16 q1, q0, r1 +; CHECK-NEXT: vmla.i16 q1, q0, r1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: bne .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup @@ -163,7 +163,7 @@ ; CHECK-NEXT: vldrh.u16 q0, [r0], #16 ; CHECK-NEXT: vldrh.u16 q1, [r2] ; CHECK-NEXT: subs r3, #16 -; CHECK-NEXT: vmla.u8 q1, q0, r1 +; CHECK-NEXT: vmla.i8 q1, q0, r1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: bne .LBB8_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup @@ -197,7 +197,7 @@ define arm_aapcs_vfpcc <4 x i32> @vmlasu32(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind { ; CHECK-LABEL: vmlasu32: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u32 q0, q1, r0 +; CHECK-NEXT: vmlas.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <4 x i32> undef, i32 %X, i32 0 @@ -210,7 +210,7 @@ define arm_aapcs_vfpcc <4 x i32> @vmlasu32b(<4 x i32> %A, <4 x i32> %B, i32 %X) nounwind { ; CHECK-LABEL: vmlasu32b: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u32 q0, q1, r0 +; CHECK-NEXT: vmlas.i32 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <4 x i32> undef, i32 %X, i32 0 @@ -223,7 +223,7 @@ define arm_aapcs_vfpcc <8 x i16> @vmlasu16(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind { ; CHECK-LABEL: vmlasu16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u16 q0, q1, r0 +; CHECK-NEXT: vmlas.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <8 x i16> undef, i16 %X, i32 0 @@ -236,7 +236,7 @@ define arm_aapcs_vfpcc <8 x i16> @vmlasu16b(<8 x i16> %A, <8 x i16> %B, i16 %X) nounwind { ; CHECK-LABEL: vmlasu16b: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u16 q0, q1, r0 +; CHECK-NEXT: vmlas.i16 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <8 x i16> undef, i16 %X, i32 0 @@ -249,7 +249,7 @@ define arm_aapcs_vfpcc <16 x i8> @vmlasu8(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind { ; CHECK-LABEL: vmlasu8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u8 q0, q1, r0 +; CHECK-NEXT: vmlas.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <16 x i8> undef, i8 %X, i32 0 @@ -262,7 +262,7 @@ define arm_aapcs_vfpcc <16 x i8> @vmlasu8b(<16 x i8> %A, <16 x i8> %B, i8 %X) nounwind { ; CHECK-LABEL: vmlasu8b: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmlas.u8 q0, q1, r0 +; CHECK-NEXT: vmlas.i8 q0, q1, r0 ; CHECK-NEXT: bx lr entry: %0 = insertelement <16 x i8> undef, i8 %X, i32 0 @@ -280,7 +280,7 @@ ; CHECK-NEXT: vldrw.u32 q0, [r2] ; CHECK-NEXT: vldrw.u32 q1, [r0], #16 ; CHECK-NEXT: subs r3, #4 -; CHECK-NEXT: vmlas.u32 q1, q0, r1 +; CHECK-NEXT: vmlas.i32 q1, q0, r1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: bne .LBB15_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup @@ -318,7 +318,7 @@ ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: subs r3, #8 -; CHECK-NEXT: vmlas.u16 q1, q0, r1 +; CHECK-NEXT: vmlas.i16 q1, q0, r1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: bne .LBB16_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup @@ -356,7 +356,7 @@ ; CHECK-NEXT: vldrh.u16 q0, [r2] ; CHECK-NEXT: vldrh.u16 q1, [r0], #16 ; CHECK-NEXT: subs r3, #16 -; CHECK-NEXT: vmlas.u8 q1, q0, r1 +; CHECK-NEXT: vmlas.i8 q1, q0, r1 ; CHECK-NEXT: vstrb.8 q1, [r2], #16 ; CHECK-NEXT: bne .LBB17_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup diff --git a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll --- a/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vmovlloop.ll @@ -167,7 +167,7 @@ ; CHECK-NEXT: vmovlb.u8 q2, q2 ; CHECK-NEXT: vmul.i16 q3, q2, q3 ; CHECK-NEXT: vldrb.u16 q2, [r0], #8 -; CHECK-NEXT: vmla.u16 q3, q1, r3 +; CHECK-NEXT: vmla.i16 q3, q1, r3 ; CHECK-NEXT: vldrb.u16 q1, [r2], #8 ; CHECK-NEXT: vshr.u16 q3, q3, #8 ; CHECK-NEXT: vstrb.16 q3, [r0, #-16] diff --git a/llvm/test/MC/ARM/mve-qdest-rsrc.s b/llvm/test/MC/ARM/mve-qdest-rsrc.s --- a/llvm/test/MC/ARM/mve-qdest-rsrc.s +++ b/llvm/test/MC/ARM/mve-qdest-rsrc.s @@ -316,28 +316,40 @@ # CHECK-NOFP-NOT: vfmas.f32 q0, q3, lr @ encoding: [0x37,0xee,0x4e,0x1e] vfmas.f32 q0, q3, lr -# CHECK: vmlas.s8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] -# CHECK-NOFP: vmlas.s8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] +# CHECK: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] +# CHECK-NOFP: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] +vmlas.i8 q0, q0, r6 + +# CHECK: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] +# CHECK-NOFP: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] +vmlas.i16 q0, q2, r9 + +# CHECK: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] +# CHECK-NOFP: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] +vmlas.i32 q0, q7, r6 + +# CHECK: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] +# CHECK-NOFP: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] vmlas.s8 q0, q0, r6 -# CHECK: vmlas.s16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] -# CHECK-NOFP: vmlas.s16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] +# CHECK: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] +# CHECK-NOFP: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] vmlas.s16 q0, q2, r9 -# CHECK: vmlas.s32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] -# CHECK-NOFP: vmlas.s32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] +# CHECK: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] +# CHECK-NOFP: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] vmlas.s32 q0, q7, r6 -# CHECK: vmlas.u8 q0, q5, lr @ encoding: [0x0b,0xfe,0x4e,0x1e] -# CHECK-NOFP: vmlas.u8 q0, q5, lr @ encoding: [0x0b,0xfe,0x4e,0x1e] +# CHECK: vmlas.i8 q0, q5, lr @ encoding: [0x0b,0xee,0x4e,0x1e] +# CHECK-NOFP: vmlas.i8 q0, q5, lr @ encoding: [0x0b,0xee,0x4e,0x1e] vmlas.u8 q0, q5, lr -# CHECK: vmlas.u16 q0, q3, r12 @ encoding: [0x17,0xfe,0x4c,0x1e] -# CHECK-NOFP: vmlas.u16 q0, q3, r12 @ encoding: [0x17,0xfe,0x4c,0x1e] +# CHECK: vmlas.i16 q0, q3, r12 @ encoding: [0x17,0xee,0x4c,0x1e] +# CHECK-NOFP: vmlas.i16 q0, q3, r12 @ encoding: [0x17,0xee,0x4c,0x1e] vmlas.u16 q0, q3, r12 -# CHECK: vmlas.u32 q1, q1, r11 @ encoding: [0x23,0xfe,0x4b,0x3e] -# CHECK-NOFP: vmlas.u32 q1, q1, r11 @ encoding: [0x23,0xfe,0x4b,0x3e] +# CHECK: vmlas.i32 q1, q1, r11 @ encoding: [0x23,0xee,0x4b,0x3e] +# CHECK-NOFP: vmlas.i32 q1, q1, r11 @ encoding: [0x23,0xee,0x4b,0x3e] vmlas.u32 q1, q1, r11 # CHECK: vfma.f16 q1, q1, r6 @ encoding: [0x33,0xfe,0x46,0x2e] @@ -348,28 +360,40 @@ # CHECK-NOFP-NOT: vfmas.f32 q7, q4, r6 @ encoding: [0x39,0xee,0x46,0xfe] vfmas.f32 q7, q4, r6 -# CHECK: vmla.s8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] -# CHECK-NOFP: vmla.s8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] +# CHECK: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] +# CHECK-NOFP: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] +vmla.i8 q0, q3, r8 + +# CHECK: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] +# CHECK-NOFP: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] +vmla.i16 q1, q3, r10 + +# CHECK: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] +# CHECK-NOFP: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] +vmla.i32 q1, q3, r1 + +# CHECK: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] +# CHECK-NOFP: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] vmla.s8 q0, q3, r8 -# CHECK: vmla.s16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] -# CHECK-NOFP: vmla.s16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] +# CHECK: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] +# CHECK-NOFP: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] vmla.s16 q1, q3, r10 -# CHECK: vmla.s32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] -# CHECK-NOFP: vmla.s32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] +# CHECK: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] +# CHECK-NOFP: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] vmla.s32 q1, q3, r1 -# CHECK: vmla.u8 q0, q7, r10 @ encoding: [0x0f,0xfe,0x4a,0x0e] -# CHECK-NOFP: vmla.u8 q0, q7, r10 @ encoding: [0x0f,0xfe,0x4a,0x0e] +# CHECK: vmla.i8 q0, q7, r10 @ encoding: [0x0f,0xee,0x4a,0x0e] +# CHECK-NOFP: vmla.i8 q0, q7, r10 @ encoding: [0x0f,0xee,0x4a,0x0e] vmla.u8 q0, q7, r10 -# CHECK: vmla.u16 q0, q0, r7 @ encoding: [0x11,0xfe,0x47,0x0e] -# CHECK-NOFP: vmla.u16 q0, q0, r7 @ encoding: [0x11,0xfe,0x47,0x0e] +# CHECK: vmla.i16 q0, q0, r7 @ encoding: [0x11,0xee,0x47,0x0e] +# CHECK-NOFP: vmla.i16 q0, q0, r7 @ encoding: [0x11,0xee,0x47,0x0e] vmla.u16 q0, q0, r7 -# CHECK: vmla.u32 q1, q6, r10 @ encoding: [0x2d,0xfe,0x4a,0x2e] -# CHECK-NOFP: vmla.u32 q1, q6, r10 @ encoding: [0x2d,0xfe,0x4a,0x2e] +# CHECK: vmla.i32 q1, q6, r10 @ encoding: [0x2d,0xee,0x4a,0x2e] +# CHECK-NOFP: vmla.i32 q1, q6, r10 @ encoding: [0x2d,0xee,0x4a,0x2e] vmla.u32 q1, q6, r10 # CHECK: vqdmlash.s8 q0, q0, r5 @ encoding: [0x00,0xee,0x65,0x1e] diff --git a/llvm/test/MC/Disassembler/ARM/mve-qdest-rsrc.txt b/llvm/test/MC/Disassembler/ARM/mve-qdest-rsrc.txt --- a/llvm/test/MC/Disassembler/ARM/mve-qdest-rsrc.txt +++ b/llvm/test/MC/Disassembler/ARM/mve-qdest-rsrc.txt @@ -315,27 +315,27 @@ # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x37,0xee,0x4e,0x1e] -# CHECK: vmlas.s8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] +# CHECK: vmlas.i8 q0, q0, r6 @ encoding: [0x01,0xee,0x46,0x1e] # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x01,0xee,0x46,0x1e] -# CHECK: vmlas.s16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] +# CHECK: vmlas.i16 q0, q2, r9 @ encoding: [0x15,0xee,0x49,0x1e] # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x15,0xee,0x49,0x1e] -# CHECK: vmlas.s32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] +# CHECK: vmlas.i32 q0, q7, r6 @ encoding: [0x2f,0xee,0x46,0x1e] # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x2f,0xee,0x46,0x1e] -# CHECK: vmlas.u8 q0, q5, lr @ encoding: [0x0b,0xfe,0x4e,0x1e] +# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x0b,0xfe,0x4e,0x1e] -# CHECK: vmlas.u16 q0, q3, r12 @ encoding: [0x17,0xfe,0x4c,0x1e] +# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x17,0xfe,0x4c,0x1e] -# CHECK: vmlas.u32 q1, q1, r11 @ encoding: [0x23,0xfe,0x4b,0x3e] +# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x23,0xfe,0x4b,0x3e] @@ -347,27 +347,27 @@ # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x39,0xee,0x46,0xfe] -# CHECK: vmla.s8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] +# CHECK: vmla.i8 q0, q3, r8 @ encoding: [0x07,0xee,0x48,0x0e] # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x07,0xee,0x48,0x0e] -# CHECK: vmla.s16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] +# CHECK: vmla.i16 q1, q3, r10 @ encoding: [0x17,0xee,0x4a,0x2e] # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x17,0xee,0x4a,0x2e] -# CHECK: vmla.s32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] +# CHECK: vmla.i32 q1, q3, r1 @ encoding: [0x27,0xee,0x41,0x2e] # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x27,0xee,0x41,0x2e] -# CHECK: vmla.u8 q0, q7, r10 @ encoding: [0x0f,0xfe,0x4a,0x0e] +# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x0f,0xfe,0x4a,0x0e] -# CHECK: vmla.u16 q0, q0, r7 @ encoding: [0x11,0xfe,0x47,0x0e] +# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x11,0xfe,0x47,0x0e] -# CHECK: vmla.u32 q1, q6, r10 @ encoding: [0x2d,0xfe,0x4a,0x2e] +# ERROR: [[@LINE+2]]:2: warning: invalid instruction encoding # CHECK-NOMVE: [[@LINE+1]]:2: warning: invalid instruction encoding [0x2d,0xfe,0x4a,0x2e] diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll --- a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll +++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll @@ -803,7 +803,7 @@ ret i64 %r.0.lcssa } -; 4x to use VMLA.u32 +; 4x to use VMLA.i32 define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i32_i32( ; CHECK-NEXT: entry: @@ -856,7 +856,7 @@ ret i32 %r.0.lcssa } -; 8x to use VMLA.u16 +; 8x to use VMLA.i16 define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i16_i32( ; CHECK-NEXT: entry: @@ -913,7 +913,7 @@ ret i32 %r.0.lcssa } -; 16x to use VMLA.u8 +; 16x to use VMLA.i8 define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i8_i32( ; CHECK-NEXT: entry: @@ -970,7 +970,7 @@ ret i32 %r.0.lcssa } -; 8x to use VMLA.u16 +; 8x to use VMLA.i16 define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i16_i16( ; CHECK-NEXT: entry: @@ -1023,7 +1023,7 @@ ret i16 %r.0.lcssa } -; 16x to use VMLA.u8 +; 16x to use VMLA.i8 define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i8_i16( ; CHECK-NEXT: entry: @@ -1080,7 +1080,7 @@ ret i16 %r.0.lcssa } -; 16x to use VMLA.u8 +; 16x to use VMLA.i8 define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i8_i8( ; CHECK-NEXT: entry: @@ -1430,7 +1430,7 @@ ret i32 %ret.lcssa } -; 16x to use VMLA.u8, same as mla_i8_i32 with multiple uses of the ext `add(mul(x, x))` +; 16x to use VMLA.i8, same as mla_i8_i32 with multiple uses of the ext `add(mul(x, x))` define i32 @mla_i8_i32_multiuse(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 { ; CHECK-LABEL: @mla_i8_i32_multiuse( ; CHECK-NEXT: entry: diff --git a/llvm/unittests/Target/ARM/MachineInstrTest.cpp b/llvm/unittests/Target/ARM/MachineInstrTest.cpp --- a/llvm/unittests/Target/ARM/MachineInstrTest.cpp +++ b/llvm/unittests/Target/ARM/MachineInstrTest.cpp @@ -583,18 +583,12 @@ case MVE_VRMLALDAVHu32: case MVE_VRMLSLDAVHas32: case MVE_VRMLSLDAVHs32: - case MVE_VMLAS_qr_s16: - case MVE_VMLAS_qr_s32: - case MVE_VMLAS_qr_s8: - case MVE_VMLAS_qr_u16: - case MVE_VMLAS_qr_u32: - case MVE_VMLAS_qr_u8: - case MVE_VMLA_qr_s16: - case MVE_VMLA_qr_s32: - case MVE_VMLA_qr_s8: - case MVE_VMLA_qr_u16: - case MVE_VMLA_qr_u32: - case MVE_VMLA_qr_u8: + case MVE_VMLAS_qr_i16: + case MVE_VMLAS_qr_i32: + case MVE_VMLAS_qr_i8: + case MVE_VMLA_qr_i16: + case MVE_VMLA_qr_i32: + case MVE_VMLA_qr_i8: case MVE_VHADD_qr_s16: case MVE_VHADD_qr_s32: case MVE_VHADD_qr_s8: @@ -1311,10 +1305,8 @@ case MVE_VMLADAVs8: case MVE_VMLADAVu8: case MVE_VMLADAVxs8: - case MVE_VMLAS_qr_s8: - case MVE_VMLAS_qr_u8: - case MVE_VMLA_qr_s8: - case MVE_VMLA_qr_u8: + case MVE_VMLAS_qr_i8: + case MVE_VMLA_qr_i8: case MVE_VMLSDAVas8: case MVE_VMLSDAVaxs8: case MVE_VMLSDAVs8: @@ -1542,10 +1534,8 @@ case MVE_VMLALDAVs16: case MVE_VMLALDAVu16: case MVE_VMLALDAVxs16: - case MVE_VMLAS_qr_s16: - case MVE_VMLAS_qr_u16: - case MVE_VMLA_qr_s16: - case MVE_VMLA_qr_u16: + case MVE_VMLAS_qr_i16: + case MVE_VMLA_qr_i16: case MVE_VMLSDAVas16: case MVE_VMLSDAVaxs16: case MVE_VMLSDAVs16: @@ -1856,10 +1846,8 @@ case MVE_VMLALDAVs32: case MVE_VMLALDAVu32: case MVE_VMLALDAVxs32: - case MVE_VMLAS_qr_s32: - case MVE_VMLAS_qr_u32: - case MVE_VMLA_qr_s32: - case MVE_VMLA_qr_u32: + case MVE_VMLAS_qr_i32: + case MVE_VMLA_qr_i32: case MVE_VMLSDAVas32: case MVE_VMLSDAVaxs32: case MVE_VMLSDAVs32: