diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1851,6 +1851,14 @@ (i32 (EXTRACT_SUBREG MQPR:$src, (SSubReg_f32_reg imm:$lane))), rGPR)>; def : Pat<(insertelt (v4i32 MQPR:$src1), rGPR:$src2, imm:$lane), (MVE_VMOV_to_lane_32 MQPR:$src1, rGPR:$src2, imm:$lane)>; + // This tries to copy from one lane to another, without going via GPR regs + def : Pat<(insertelt (v4i32 MQPR:$src1), (extractelt (v4i32 MQPR:$src2), imm:$extlane), imm:$inslane), + (v4i32 (COPY_TO_REGCLASS + (INSERT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4i32 MQPR:$src1), MQPR)), + (f32 (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4i32 MQPR:$src2), MQPR)), + (SSubReg_f32_reg imm:$extlane))), + (SSubReg_f32_reg imm:$inslane)), + MQPR))>; def : Pat<(vector_insert (v16i8 MQPR:$src1), rGPR:$src2, imm:$lane), (MVE_VMOV_to_lane_8 MQPR:$src1, rGPR:$src2, imm:$lane)>; diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll @@ -55,7 +55,7 @@ ; CHECK-LABEL: add_v2i32_v2i64_sext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmov r2, s2 ; CHECK-NEXT: asrs r1, r0, #31 ; CHECK-NEXT: vmov.32 q1[1], r1 @@ -889,7 +889,7 @@ ; CHECK-LABEL: add_v2i32_v2i64_acc_sext: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov q1, q0 ; CHECK-NEXT: vmov r3, s2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: vmov.32 q1[1], r2