Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -578,6 +578,8 @@ CCAssignFn *CCAssignFnForCall(CallingConv::ID CC, bool isVarArg) const; CCAssignFn *CCAssignFnForReturn(CallingConv::ID CC, bool isVarArg) const; + TargetLoweringBase::LegalizeTypeAction getPreferredVectorAction(MVT VT) const; + /// Returns true if \p VecTy is a legal interleaved access type. This /// function checks the vector element type and the overall width of the /// vector. Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -15209,6 +15209,17 @@ return Subtarget->isMClass(); } +TargetLoweringBase::LegalizeTypeAction +ARMTargetLowering::getPreferredVectorAction(MVT VT) const { + // Don't attempt to turn v2i32 into v2i64 and linearise from there. + // Instead Widen the vector. + if (Subtarget->hasMVEIntegerOps() && + (VT == MVT::v2i8 || VT == MVT::v2i16 || VT == MVT::v2i32)) + return TypeWidenVector; + + return TargetLoweringBase::getPreferredVectorAction(VT); +} + /// A helper function for determining the number of interleaved accesses we /// will generate when lowering accesses of the given type. unsigned Index: llvm/test/CodeGen/Thumb2/mve-add.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-add.ll +++ llvm/test/CodeGen/Thumb2/mve-add.ll @@ -4,25 +4,8 @@ define arm_aapcs_vfpcc <2 x i8> @add_v2i8(<2 x i8> %in1, <2 x i8> %in2) { ; CHECK-LABEL: add_v2i8: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov r1, s3 -; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: adc.w r12, r1, r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.32 q0[2], lr -; CHECK-NEXT: vmov.32 q0[3], r12 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: vadd.i8 q0, q0, q1 +; CHECK-NEXT: bx lr %out = add <2 x i8> %in1, %in2 ret <2 x i8> %out } @@ -80,25 +63,8 @@ define arm_aapcs_vfpcc <2 x i16> @add_v2i16(<2 x i16> %in1, <2 x i16> %in2) { ; CHECK-LABEL: add_v2i16: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov r1, s3 -; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: adc.w r12, r1, r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.32 q0[2], lr -; CHECK-NEXT: vmov.32 q0[3], r12 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: vadd.i16 q0, q0, q1 +; CHECK-NEXT: bx lr %out = add <2 x i16> %in1, %in2 ret <2 x i16> %out } @@ -159,25 +125,8 @@ define arm_aapcs_vfpcc <2 x i32> @add_v2i32(<2 x i32> %in1, <2 x i32> %in2) { ; CHECK-LABEL: add_v2i32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .save {r7, lr} -; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: vmov r1, s3 -; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov r3, s1 -; CHECK-NEXT: adc.w r12, r1, r0 -; CHECK-NEXT: vmov r0, s4 -; CHECK-NEXT: vmov r1, s5 -; CHECK-NEXT: adds r0, r0, r2 -; CHECK-NEXT: adcs r1, r3 -; CHECK-NEXT: vmov.32 q0[0], r0 -; CHECK-NEXT: vmov.32 q0[1], r1 -; CHECK-NEXT: vmov.32 q0[2], lr -; CHECK-NEXT: vmov.32 q0[3], r12 -; CHECK-NEXT: pop {r7, pc} +; CHECK-NEXT: vadd.i32 q0, q0, q1 +; CHECK-NEXT: bx lr %out = add <2 x i32> %in1, %in2 ret <2 x i32> %out }