diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp --- a/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -977,6 +977,7 @@ setTargetDAGCombine(ISD::VECTOR_SHUFFLE); setTargetDAGCombine(ISD::INSERT_VECTOR_ELT); setTargetDAGCombine(ISD::EXTRACT_VECTOR_ELT); + setTargetDAGCombine(ISD::SIGN_EXTEND_INREG); setTargetDAGCombine(ISD::STORE); setTargetDAGCombine(ISD::SIGN_EXTEND); setTargetDAGCombine(ISD::ZERO_EXTEND); @@ -13985,6 +13986,20 @@ return SDValue(); } +static SDValue PerformSignExtendInregCombine(SDNode *N, SelectionDAG &DAG) { + SDValue Op = N->getOperand(0); + EVT VT = N->getValueType(0); + + // sext_inreg(VGETLANEu) -> VGETLANEs + if (Op.getOpcode() == ARMISD::VGETLANEu && + cast(N->getOperand(1))->getVT() == + Op.getOperand(0).getValueType().getScalarType()) + return DAG.getNode(ARMISD::VGETLANEs, SDLoc(N), VT, Op.getOperand(0), + Op.getOperand(1)); + + return SDValue(); +} + /// PerformVECTOR_SHUFFLECombine - Target-specific dag combine xforms for /// ISD::VECTOR_SHUFFLE. static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { @@ -16356,6 +16371,7 @@ case ISD::INSERT_VECTOR_ELT: return PerformInsertEltCombine(N, DCI); case ISD::EXTRACT_VECTOR_ELT: return PerformExtractEltCombine(N, DCI, Subtarget); + case ISD::SIGN_EXTEND_INREG: return PerformSignExtendInregCombine(N, DCI.DAG); case ISD::VECTOR_SHUFFLE: return PerformVECTOR_SHUFFLECombine(N, DCI.DAG); case ARMISD::VDUPLANE: return PerformVDUPLANECombine(N, DCI, Subtarget); case ARMISD::VDUP: return PerformVDUPCombine(N, DCI, Subtarget); diff --git a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll --- a/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll +++ b/llvm/test/CodeGen/ARM/dagcombine-anyexttozeroext.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple armv7 %s -o - | FileCheck %s define float @f(<4 x i16>* nocapture %in) { @@ -64,12 +65,10 @@ } define float @i(<4 x i16>* nocapture %in) { - ; FIXME: The vmov.u + sxt can convert to a vmov.s ; CHECK-LABEL: i: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] -; CHECK-NEXT: vmov.u16 r0, d16[0] -; CHECK-NEXT: sxth r0, r0 +; CHECK-NEXT: vmov.s16 r0, d16[0] ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmov r0, s0 @@ -96,12 +95,10 @@ } define float @k(<8 x i8>* nocapture %in) { -; FIXME: The vmov.u + sxt can convert to a vmov.s ; CHECK-LABEL: k: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr d16, [r0] -; CHECK-NEXT: vmov.u8 r0, d16[7] -; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: vmov.s8 r0, d16[7] ; CHECK-NEXT: vmov s0, r0 ; CHECK-NEXT: vcvt.f32.s32 s0, s0 ; CHECK-NEXT: vmov r0, s0 diff --git a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll --- a/llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ b/llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -154,58 +154,40 @@ define arm_aapcs_vfpcc <8 x i16> @sdiv_i16(<8 x i16> %in1, <8 x i16> %in2) { ; CHECK-LABEL: sdiv_i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, lr} -; CHECK-NEXT: push {r4, r5, r6, lr} -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: sdiv r12, r1, r0 -; CHECK-NEXT: vmov.u16 r1, q1[2] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: vmov.u16 r4, q1[6] -; CHECK-NEXT: sdiv r3, r2, r1 -; CHECK-NEXT: vmov.u16 r1, q1[1] -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: vmov.u16 r5, q0[6] -; CHECK-NEXT: sdiv r0, r2, r1 -; CHECK-NEXT: vmov.u16 r1, q1[0] -; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: vmov.s16 r0, q1[0] +; CHECK-NEXT: vmov.s16 r1, q0[0] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s16 r1, q1[1] +; CHECK-NEXT: vmov.s16 r2, q0[1] +; CHECK-NEXT: vmov.16 q2[0], r0 ; CHECK-NEXT: sdiv r1, r2, r1 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: vmov.16 q2[0], r1 -; CHECK-NEXT: sxth.w lr, r2 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.16 q2[2], r3 -; CHECK-NEXT: vmov.u16 r3, q1[4] -; CHECK-NEXT: sxth r6, r2 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: vmov.u16 r1, q1[5] -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: sxth r3, r3 -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: vmov.16 q2[3], r12 -; CHECK-NEXT: sdiv r2, r2, r3 -; CHECK-NEXT: sxth r5, r5 -; CHECK-NEXT: vmov.16 q2[4], r2 -; CHECK-NEXT: sdiv r0, r0, r1 +; CHECK-NEXT: vmov.s16 r0, q1[2] +; CHECK-NEXT: vmov.16 q2[1], r1 +; CHECK-NEXT: vmov.s16 r1, q0[2] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s16 r1, q0[3] +; CHECK-NEXT: vmov.16 q2[2], r0 +; CHECK-NEXT: vmov.s16 r0, q1[3] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s16 r1, q0[4] +; CHECK-NEXT: vmov.16 q2[3], r0 +; CHECK-NEXT: vmov.s16 r0, q1[4] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s16 r1, q0[5] +; CHECK-NEXT: vmov.16 q2[4], r0 +; CHECK-NEXT: vmov.s16 r0, q1[5] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s16 r1, q0[6] ; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: sdiv r0, r5, r4 +; CHECK-NEXT: vmov.s16 r0, q1[6] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s16 r1, q0[7] ; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: sdiv r0, r6, lr +; CHECK-NEXT: vmov.s16 r0, q1[7] +; CHECK-NEXT: sdiv r0, r1, r0 ; CHECK-NEXT: vmov.16 q2[7], r0 ; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: pop {r4, r5, r6, pc} +; CHECK-NEXT: bx lr entry: %out = sdiv <8 x i16> %in1, %in2 ret <8 x i16> %out @@ -265,65 +247,49 @@ define arm_aapcs_vfpcc <8 x i16> @srem_i16(<8 x i16> %in1, <8 x i16> %in2) { ; CHECK-LABEL: srem_i16: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} -; CHECK-NEXT: vmov.u16 r5, q1[6] -; CHECK-NEXT: vmov.u16 r6, q0[6] -; CHECK-NEXT: sxth r5, r5 -; CHECK-NEXT: sxth r6, r6 -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: sdiv r7, r6, r5 -; CHECK-NEXT: vmov.u16 r2, q1[7] -; CHECK-NEXT: sxth.w r8, r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: mls r12, r7, r5, r6 -; CHECK-NEXT: vmov.u16 r7, q0[7] -; CHECK-NEXT: sxth r3, r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r7, r7 -; CHECK-NEXT: sxth r4, r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: sdiv r6, r7, r2 -; CHECK-NEXT: mls lr, r6, r2, r7 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: sxth r1, r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sdiv r5, r2, r0 -; CHECK-NEXT: vmov.u16 r6, q0[1] -; CHECK-NEXT: mls r0, r5, r0, r2 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sdiv r5, r2, r1 -; CHECK-NEXT: sxth r6, r6 -; CHECK-NEXT: mls r1, r5, r1, r2 -; CHECK-NEXT: vmov.u16 r2, q0[2] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sdiv r5, r2, r4 -; CHECK-NEXT: mls r2, r5, r4, r2 -; CHECK-NEXT: vmov.u16 r4, q0[3] -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: vmov.s16 r0, q1[6] +; CHECK-NEXT: vmov.s16 r1, q0[6] +; CHECK-NEXT: sdiv r2, r1, r0 +; CHECK-NEXT: mls r12, r2, r0, r1 +; CHECK-NEXT: vmov.s16 r1, q1[7] +; CHECK-NEXT: vmov.s16 r2, q0[7] +; CHECK-NEXT: sdiv r3, r2, r1 +; CHECK-NEXT: mls lr, r3, r1, r2 +; CHECK-NEXT: vmov.s16 r2, q1[4] +; CHECK-NEXT: vmov.s16 r3, q0[4] +; CHECK-NEXT: sdiv r0, r3, r2 +; CHECK-NEXT: mls r2, r0, r2, r3 +; CHECK-NEXT: vmov.s16 r0, q1[5] +; CHECK-NEXT: vmov.s16 r3, q0[5] +; CHECK-NEXT: sdiv r1, r3, r0 +; CHECK-NEXT: mls r0, r1, r0, r3 +; CHECK-NEXT: vmov.s16 r1, q1[2] +; CHECK-NEXT: vmov.s16 r3, q0[2] +; CHECK-NEXT: sdiv r4, r3, r1 +; CHECK-NEXT: mls r1, r4, r1, r3 +; CHECK-NEXT: vmov.s16 r3, q1[3] +; CHECK-NEXT: vmov.s16 r4, q0[3] ; CHECK-NEXT: sdiv r5, r4, r3 ; CHECK-NEXT: mls r3, r5, r3, r4 -; CHECK-NEXT: vmov.u16 r4, q0[0] -; CHECK-NEXT: sxth r4, r4 -; CHECK-NEXT: sdiv r5, r4, r8 -; CHECK-NEXT: mls r4, r5, r8, r4 -; CHECK-NEXT: vmov.u16 r5, q1[1] -; CHECK-NEXT: sxth r5, r5 +; CHECK-NEXT: vmov.s16 r4, q1[0] +; CHECK-NEXT: vmov.s16 r5, q0[0] +; CHECK-NEXT: sdiv r6, r5, r4 +; CHECK-NEXT: mls r4, r6, r4, r5 +; CHECK-NEXT: vmov.s16 r6, q0[1] +; CHECK-NEXT: vmov.s16 r5, q1[1] ; CHECK-NEXT: sdiv r7, r6, r5 ; CHECK-NEXT: vmov.16 q0[0], r4 ; CHECK-NEXT: mls r5, r7, r5, r6 ; CHECK-NEXT: vmov.16 q0[1], r5 -; CHECK-NEXT: vmov.16 q0[2], r2 +; CHECK-NEXT: vmov.16 q0[2], r1 ; CHECK-NEXT: vmov.16 q0[3], r3 -; CHECK-NEXT: vmov.16 q0[4], r0 -; CHECK-NEXT: vmov.16 q0[5], r1 +; CHECK-NEXT: vmov.16 q0[4], r2 +; CHECK-NEXT: vmov.16 q0[5], r0 ; CHECK-NEXT: vmov.16 q0[6], r12 ; CHECK-NEXT: vmov.16 q0[7], lr -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} entry: %out = srem <8 x i16> %in1, %in2 ret <8 x i16> %out @@ -407,106 +373,72 @@ define arm_aapcs_vfpcc <16 x i8> @sdiv_i8(<16 x i8> %in1, <16 x i8> %in2) { ; CHECK-LABEL: sdiv_i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, lr} -; CHECK-NEXT: push {r4, r5, r6, r7, lr} -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: vmov.u8 r2, q0[0] +; CHECK-NEXT: vmov.s8 r0, q1[0] +; CHECK-NEXT: vmov.s8 r1, q0[0] ; CHECK-NEXT: sdiv r0, r1, r0 -; CHECK-NEXT: vmov.u8 r1, q1[0] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: vmov.u8 r4, q1[3] +; CHECK-NEXT: vmov.s8 r1, q1[1] +; CHECK-NEXT: vmov.s8 r2, q0[1] +; CHECK-NEXT: vmov.8 q2[0], r0 ; CHECK-NEXT: sdiv r1, r2, r1 -; CHECK-NEXT: vmov.u8 r5, q0[3] -; CHECK-NEXT: vmov.8 q2[0], r1 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov.8 q2[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.u8 r2, q1[11] -; CHECK-NEXT: vmov.u8 r3, q0[11] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sdiv r0, r0, r1 -; CHECK-NEXT: sxtb.w r12, r2 -; CHECK-NEXT: sxtb.w lr, r3 -; CHECK-NEXT: vmov.u8 r2, q1[4] -; CHECK-NEXT: vmov.u8 r3, q0[4] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r0, q1[2] +; CHECK-NEXT: vmov.8 q2[1], r1 +; CHECK-NEXT: vmov.s8 r1, q0[2] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[3] ; CHECK-NEXT: vmov.8 q2[2], r0 -; CHECK-NEXT: sdiv r0, r5, r4 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q1[3] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[4] ; CHECK-NEXT: vmov.8 q2[3], r0 -; CHECK-NEXT: sdiv r0, r3, r2 -; CHECK-NEXT: vmov.u8 r1, q0[10] +; CHECK-NEXT: vmov.s8 r0, q1[4] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[5] ; CHECK-NEXT: vmov.8 q2[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sdiv r12, lr, r12 -; CHECK-NEXT: sdiv lr, r1, r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.u8 r1, q0[9] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sdiv r2, r1, r0 -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmov.u8 r1, q0[8] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: vmov.u8 r3, q0[7] -; CHECK-NEXT: sdiv r1, r1, r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sdiv r4, r3, r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.u8 r3, q0[6] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: vmov.u8 r6, q0[12] -; CHECK-NEXT: sdiv r5, r3, r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.u8 r3, q0[5] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sdiv r0, r3, r0 -; CHECK-NEXT: vmov.u8 r3, q1[15] +; CHECK-NEXT: vmov.s8 r0, q1[5] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[6] ; CHECK-NEXT: vmov.8 q2[5], r0 -; CHECK-NEXT: sxtb r7, r3 -; CHECK-NEXT: vmov.8 q2[6], r5 -; CHECK-NEXT: vmov.u8 r3, q1[12] -; CHECK-NEXT: vmov.8 q2[7], r4 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: vmov.8 q2[8], r1 -; CHECK-NEXT: vmov.u8 r1, q1[13] -; CHECK-NEXT: vmov.8 q2[9], r2 -; CHECK-NEXT: vmov.u8 r2, q0[13] -; CHECK-NEXT: vmov.8 q2[10], lr -; CHECK-NEXT: vmov.u8 r5, q1[14] -; CHECK-NEXT: vmov.u8 r4, q0[14] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: vmov.8 q2[11], r12 -; CHECK-NEXT: sdiv r3, r6, r3 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov.8 q2[12], r3 -; CHECK-NEXT: sdiv r1, r2, r1 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: vmov.8 q2[13], r1 -; CHECK-NEXT: sdiv r1, r4, r5 -; CHECK-NEXT: sdiv r0, r0, r7 -; CHECK-NEXT: vmov.8 q2[14], r1 +; CHECK-NEXT: vmov.s8 r0, q1[6] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[7] +; CHECK-NEXT: vmov.8 q2[6], r0 +; CHECK-NEXT: vmov.s8 r0, q1[7] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[8] +; CHECK-NEXT: vmov.8 q2[7], r0 +; CHECK-NEXT: vmov.s8 r0, q1[8] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[9] +; CHECK-NEXT: vmov.8 q2[8], r0 +; CHECK-NEXT: vmov.s8 r0, q1[9] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[10] +; CHECK-NEXT: vmov.8 q2[9], r0 +; CHECK-NEXT: vmov.s8 r0, q1[10] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[11] +; CHECK-NEXT: vmov.8 q2[10], r0 +; CHECK-NEXT: vmov.s8 r0, q1[11] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[12] +; CHECK-NEXT: vmov.8 q2[11], r0 +; CHECK-NEXT: vmov.s8 r0, q1[12] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[13] +; CHECK-NEXT: vmov.8 q2[12], r0 +; CHECK-NEXT: vmov.s8 r0, q1[13] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[14] +; CHECK-NEXT: vmov.8 q2[13], r0 +; CHECK-NEXT: vmov.s8 r0, q1[14] +; CHECK-NEXT: sdiv r0, r1, r0 +; CHECK-NEXT: vmov.s8 r1, q0[15] +; CHECK-NEXT: vmov.8 q2[14], r0 +; CHECK-NEXT: vmov.s8 r0, q1[15] +; CHECK-NEXT: sdiv r0, r1, r0 ; CHECK-NEXT: vmov.8 q2[15], r0 ; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +; CHECK-NEXT: bx lr entry: %out = sdiv <16 x i8> %in1, %in2 ret <16 x i8> %out @@ -607,122 +539,90 @@ define arm_aapcs_vfpcc <16 x i8> @srem_i8(<16 x i8> %in1, <16 x i8> %in2) { ; CHECK-LABEL: srem_i8: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, lr} -; CHECK-NEXT: vmov.u8 r5, q1[14] -; CHECK-NEXT: vmov.u8 r6, q0[14] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sdiv r7, r6, r5 -; CHECK-NEXT: vmov.u8 r4, q1[15] -; CHECK-NEXT: mls r12, r7, r5, r6 -; CHECK-NEXT: vmov.u8 r7, q0[15] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov.u8 r2, q1[13] -; CHECK-NEXT: sxtb r7, r7 -; CHECK-NEXT: sxtb r3, r2 -; CHECK-NEXT: sdiv r6, r7, r4 -; CHECK-NEXT: vmov.u8 r2, q1[12] -; CHECK-NEXT: mls lr, r6, r4, r7 -; CHECK-NEXT: vmov.u8 r4, q0[12] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb.w r8, r0 -; CHECK-NEXT: sdiv r5, r4, r2 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: mls r9, r5, r2, r4 -; CHECK-NEXT: vmov.u8 r4, q0[13] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov.u8 r6, q0[0] -; CHECK-NEXT: sdiv r5, r4, r3 -; CHECK-NEXT: sxtb r1, r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: mls r3, r5, r3, r4 -; CHECK-NEXT: vmov.u8 r4, q0[10] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sdiv r5, r4, r0 -; CHECK-NEXT: mls r0, r5, r0, r4 -; CHECK-NEXT: vmov.u8 r4, q0[11] -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: .save {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, lr} +; CHECK-NEXT: vmov.s8 r0, q1[14] +; CHECK-NEXT: vmov.s8 r1, q0[14] +; CHECK-NEXT: sdiv r2, r1, r0 +; CHECK-NEXT: mls r12, r2, r0, r1 +; CHECK-NEXT: vmov.s8 r0, q1[15] +; CHECK-NEXT: vmov.s8 r1, q0[15] +; CHECK-NEXT: sdiv r2, r1, r0 +; CHECK-NEXT: mls lr, r2, r0, r1 +; CHECK-NEXT: vmov.s8 r0, q1[12] +; CHECK-NEXT: vmov.s8 r1, q0[12] +; CHECK-NEXT: sdiv r2, r1, r0 +; CHECK-NEXT: mls r8, r2, r0, r1 +; CHECK-NEXT: vmov.s8 r0, q1[13] +; CHECK-NEXT: vmov.s8 r1, q0[13] +; CHECK-NEXT: sdiv r3, r1, r0 +; CHECK-NEXT: mls r3, r3, r0, r1 +; CHECK-NEXT: vmov.s8 r0, q1[10] +; CHECK-NEXT: vmov.s8 r1, q0[10] +; CHECK-NEXT: sdiv r4, r1, r0 +; CHECK-NEXT: mls r0, r4, r0, r1 +; CHECK-NEXT: vmov.s8 r1, q1[11] +; CHECK-NEXT: vmov.s8 r4, q0[11] ; CHECK-NEXT: sdiv r5, r4, r1 ; CHECK-NEXT: mls r1, r5, r1, r4 -; CHECK-NEXT: vmov.u8 r4, q0[8] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sdiv r5, r4, r8 -; CHECK-NEXT: mls r4, r5, r8, r4 -; CHECK-NEXT: vmov.u8 r5, q1[0] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r4, q1[8] +; CHECK-NEXT: vmov.s8 r5, q0[8] +; CHECK-NEXT: sdiv r6, r5, r4 +; CHECK-NEXT: mls r4, r6, r4, r5 +; CHECK-NEXT: vmov.s8 r5, q1[0] +; CHECK-NEXT: vmov.s8 r6, q0[0] ; CHECK-NEXT: sdiv r7, r6, r5 ; CHECK-NEXT: mls r5, r7, r5, r6 -; CHECK-NEXT: vmov.u8 r6, q1[1] -; CHECK-NEXT: vmov.u8 r7, q0[1] -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sxtb r7, r7 -; CHECK-NEXT: vmov.8 q2[0], r5 +; CHECK-NEXT: vmov.s8 r6, q1[1] +; CHECK-NEXT: vmov.s8 r7, q0[1] ; CHECK-NEXT: sdiv r2, r7, r6 -; CHECK-NEXT: vmov.u8 r5, q0[2] +; CHECK-NEXT: vmov.8 q2[0], r5 ; CHECK-NEXT: mls r2, r2, r6, r7 -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[2] ; CHECK-NEXT: vmov.8 q2[1], r2 -; CHECK-NEXT: vmov.u8 r2, q1[2] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q1[2] ; CHECK-NEXT: sdiv r6, r5, r2 ; CHECK-NEXT: mls r2, r6, r2, r5 -; CHECK-NEXT: vmov.u8 r5, q0[3] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[3] ; CHECK-NEXT: vmov.8 q2[2], r2 -; CHECK-NEXT: vmov.u8 r2, q1[3] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q1[3] ; CHECK-NEXT: sdiv r6, r5, r2 ; CHECK-NEXT: mls r2, r6, r2, r5 -; CHECK-NEXT: vmov.u8 r5, q0[4] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[4] ; CHECK-NEXT: vmov.8 q2[3], r2 -; CHECK-NEXT: vmov.u8 r2, q1[4] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q1[4] ; CHECK-NEXT: sdiv r6, r5, r2 ; CHECK-NEXT: mls r2, r6, r2, r5 -; CHECK-NEXT: vmov.u8 r5, q0[5] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[5] ; CHECK-NEXT: vmov.8 q2[4], r2 -; CHECK-NEXT: vmov.u8 r2, q1[5] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q1[5] ; CHECK-NEXT: sdiv r6, r5, r2 ; CHECK-NEXT: mls r2, r6, r2, r5 -; CHECK-NEXT: vmov.u8 r5, q0[6] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[6] ; CHECK-NEXT: vmov.8 q2[5], r2 -; CHECK-NEXT: vmov.u8 r2, q1[6] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q1[6] ; CHECK-NEXT: sdiv r6, r5, r2 ; CHECK-NEXT: mls r2, r6, r2, r5 -; CHECK-NEXT: vmov.u8 r5, q0[7] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[7] ; CHECK-NEXT: vmov.8 q2[6], r2 -; CHECK-NEXT: vmov.u8 r2, q1[7] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q1[7] ; CHECK-NEXT: sdiv r6, r5, r2 ; CHECK-NEXT: mls r2, r6, r2, r5 -; CHECK-NEXT: vmov.u8 r5, q0[9] -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r5, q0[9] ; CHECK-NEXT: vmov.8 q2[7], r2 -; CHECK-NEXT: vmov.u8 r2, q1[9] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: vmov.8 q2[8], r4 +; CHECK-NEXT: vmov.s8 r2, q1[9] ; CHECK-NEXT: sdiv r6, r5, r2 +; CHECK-NEXT: vmov.8 q2[8], r4 ; CHECK-NEXT: mls r2, r6, r2, r5 ; CHECK-NEXT: vmov.8 q2[9], r2 ; CHECK-NEXT: vmov.8 q2[10], r0 ; CHECK-NEXT: vmov.8 q2[11], r1 -; CHECK-NEXT: vmov.8 q2[12], r9 +; CHECK-NEXT: vmov.8 q2[12], r8 ; CHECK-NEXT: vmov.8 q2[13], r3 ; CHECK-NEXT: vmov.8 q2[14], r12 ; CHECK-NEXT: vmov.8 q2[15], lr ; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, pc} +; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, pc} entry: %out = srem <16 x i8> %in1, %in2 ret <16 x i8> %out diff --git a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll --- a/llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -91,10 +91,8 @@ define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) { ; CHECK-MVE-LABEL: foo_half_int16: ; CHECK-MVE: @ %bb.0: @ %entry -; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] -; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] -; CHECK-MVE-NEXT: sxth r0, r0 -; CHECK-MVE-NEXT: sxth r1, r1 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.s16 r1, q0[1] ; CHECK-MVE-NEXT: vmov s4, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 ; CHECK-MVE-NEXT: vmov r0, s4 @@ -102,39 +100,33 @@ ; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 ; CHECK-MVE-NEXT: vmov r1, s4 ; CHECK-MVE-NEXT: vmov.16 q1[0], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.s16 r0, q0[2] ; CHECK-MVE-NEXT: vmov.16 q1[1], r1 -; CHECK-MVE-NEXT: sxth r0, r0 ; CHECK-MVE-NEXT: vmov s8, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov.16 q1[2], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] -; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[3] ; CHECK-MVE-NEXT: vmov s8, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov.16 q1[3], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] -; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[4] ; CHECK-MVE-NEXT: vmov s8, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov.16 q1[4], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] -; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[5] ; CHECK-MVE-NEXT: vmov s8, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov.16 q1[5], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] -; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[6] ; CHECK-MVE-NEXT: vmov s8, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 ; CHECK-MVE-NEXT: vmov r0, s8 ; CHECK-MVE-NEXT: vmov.16 q1[6], r0 -; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] -; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov.s16 r0, q0[7] ; CHECK-MVE-NEXT: vmov s0, r0 ; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0 ; CHECK-MVE-NEXT: vmov r0, s0 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-add.ll @@ -178,10 +178,8 @@ define arm_aapcs_vfpcc i64 @add_v8i16_v8i64_sext(<8 x i16> %x) { ; CHECK-LABEL: add_v8i16_v8i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.u16 r1, q0[0] -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: vmov.s16 r0, q0[1] +; CHECK-NEXT: vmov.s16 r1, q0[0] ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 ; CHECK-NEXT: asrs r2, r0, #31 ; CHECK-NEXT: asrs r1, r1, #31 @@ -190,11 +188,9 @@ ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: vmov r1, s5 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u16 r3, q0[2] +; CHECK-NEXT: vmov.s16 r3, q0[2] ; CHECK-NEXT: adc.w r12, r1, r0, asr #31 -; CHECK-NEXT: vmov.u16 r1, q0[3] -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r1, q0[3] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -205,11 +201,9 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u16 r3, q0[4] +; CHECK-NEXT: vmov.s16 r3, q0[4] ; CHECK-NEXT: adc.w r12, r0, r1, asr #31 -; CHECK-NEXT: vmov.u16 r1, q0[5] -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r1, q0[5] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -220,13 +214,11 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov.s16 r3, q0[7] ; CHECK-NEXT: adc.w r0, r0, r1, asr #31 -; CHECK-NEXT: vmov.u16 r1, q0[6] -; CHECK-NEXT: sxth r1, r1 +; CHECK-NEXT: vmov.s16 r1, q0[6] ; CHECK-NEXT: adds r2, r2, r1 ; CHECK-NEXT: adc.w r1, r0, r1, asr #31 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: sxth r3, r0 ; CHECK-NEXT: adds r0, r2, r3 ; CHECK-NEXT: adc.w r1, r1, r3, asr #31 ; CHECK-NEXT: bx lr @@ -545,10 +537,8 @@ define arm_aapcs_vfpcc i64 @add_v16i8_v16i64_sext(<16 x i8> %x) { ; CHECK-LABEL: add_v16i8_v16i64_sext: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.u8 r1, q0[0] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r0, q0[1] +; CHECK-NEXT: vmov.s8 r1, q0[0] ; CHECK-NEXT: vmov q1[2], q1[0], r1, r0 ; CHECK-NEXT: asrs r2, r0, #31 ; CHECK-NEXT: asrs r1, r1, #31 @@ -557,11 +547,9 @@ ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: vmov r1, s5 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q0[2] +; CHECK-NEXT: vmov.s8 r3, q0[2] ; CHECK-NEXT: adc.w r12, r1, r0, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[3] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r1, q0[3] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -572,11 +560,9 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q0[4] +; CHECK-NEXT: vmov.s8 r3, q0[4] ; CHECK-NEXT: adc.w r12, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[5] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r1, q0[5] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -587,11 +573,9 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q0[6] +; CHECK-NEXT: vmov.s8 r3, q0[6] ; CHECK-NEXT: adc.w r12, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[7] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r1, q0[7] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -602,11 +586,9 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q0[8] +; CHECK-NEXT: vmov.s8 r3, q0[8] ; CHECK-NEXT: adc.w r12, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[9] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r1, q0[9] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -617,11 +599,9 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q0[10] +; CHECK-NEXT: vmov.s8 r3, q0[10] ; CHECK-NEXT: adc.w r12, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[11] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r1, q0[11] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -632,11 +612,9 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q0[12] +; CHECK-NEXT: vmov.s8 r3, q0[12] ; CHECK-NEXT: adc.w r12, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[13] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r1, q0[13] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r1 ; CHECK-NEXT: asrs r0, r1, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -647,13 +625,11 @@ ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds r2, r2, r3 +; CHECK-NEXT: vmov.s8 r3, q0[15] ; CHECK-NEXT: adc.w r0, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r1, q0[14] -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r1, q0[14] ; CHECK-NEXT: adds r2, r2, r1 ; CHECK-NEXT: adc.w r1, r0, r1, asr #31 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: sxtb r3, r0 ; CHECK-NEXT: adds r0, r2, r3 ; CHECK-NEXT: adc.w r1, r1, r3, asr #31 ; CHECK-NEXT: bx lr @@ -1051,10 +1027,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.u16 r3, q0[0] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r2, q0[1] +; CHECK-NEXT: vmov.s16 r3, q0[0] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 ; CHECK-NEXT: asr.w r12, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1063,11 +1037,9 @@ ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: vmov r12, s5 ; CHECK-NEXT: adds.w lr, lr, r3 -; CHECK-NEXT: vmov.u16 r3, q0[2] +; CHECK-NEXT: vmov.s16 r3, q0[2] ; CHECK-NEXT: adc.w r12, r12, r2, asr #31 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r2, q0[3] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 ; CHECK-NEXT: asrs r4, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1078,11 +1050,9 @@ ; CHECK-NEXT: adc.w r12, r12, r3 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adds.w lr, r4, r3 -; CHECK-NEXT: vmov.u16 r4, q0[5] +; CHECK-NEXT: vmov.s16 r4, q0[5] ; CHECK-NEXT: adc.w r12, r12, r2, asr #31 -; CHECK-NEXT: vmov.u16 r2, q0[4] -; CHECK-NEXT: sxth r4, r4 -; CHECK-NEXT: sxth r2, r2 +; CHECK-NEXT: vmov.s16 r2, q0[4] ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: asrs r3, r4, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1094,12 +1064,10 @@ ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: adds r2, r2, r3 ; CHECK-NEXT: adc.w r3, r12, r4, asr #31 -; CHECK-NEXT: vmov.u16 r4, q0[6] -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: vmov.s16 r4, q0[6] ; CHECK-NEXT: adds r2, r2, r4 ; CHECK-NEXT: adc.w r3, r3, r4, asr #31 -; CHECK-NEXT: vmov.u16 r4, q0[7] -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: vmov.s16 r4, q0[7] ; CHECK-NEXT: adds r2, r2, r4 ; CHECK-NEXT: adc.w r3, r3, r4, asr #31 ; CHECK-NEXT: adds r0, r0, r2 @@ -1447,10 +1415,8 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, lr} ; CHECK-NEXT: push {r4, lr} -; CHECK-NEXT: vmov.u8 r2, q0[1] -; CHECK-NEXT: vmov.u8 r3, q0[0] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q0[1] +; CHECK-NEXT: vmov.s8 r3, q0[0] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 ; CHECK-NEXT: asr.w r12, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1459,11 +1425,9 @@ ; CHECK-NEXT: vmov r3, s4 ; CHECK-NEXT: vmov r12, s5 ; CHECK-NEXT: adds.w lr, lr, r3 -; CHECK-NEXT: vmov.u8 r3, q0[2] +; CHECK-NEXT: vmov.s8 r3, q0[2] ; CHECK-NEXT: adc.w r12, r12, r2, asr #31 -; CHECK-NEXT: vmov.u8 r2, q0[3] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q0[3] ; CHECK-NEXT: vmov q1[2], q1[0], r3, r2 ; CHECK-NEXT: asrs r4, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1474,11 +1438,9 @@ ; CHECK-NEXT: adc.w r12, r12, r3 ; CHECK-NEXT: vmov r3, s6 ; CHECK-NEXT: adds.w lr, r4, r3 -; CHECK-NEXT: vmov.u8 r4, q0[5] +; CHECK-NEXT: vmov.s8 r4, q0[5] ; CHECK-NEXT: adc.w r12, r12, r2, asr #31 -; CHECK-NEXT: vmov.u8 r2, q0[4] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q0[4] ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: asrs r3, r4, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1489,11 +1451,9 @@ ; CHECK-NEXT: adc.w r12, r12, r2 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov.u8 r2, q0[6] +; CHECK-NEXT: vmov.s8 r2, q0[6] ; CHECK-NEXT: adc.w r12, r12, r4, asr #31 -; CHECK-NEXT: vmov.u8 r4, q0[7] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r4, q0[7] ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: asrs r3, r4, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1504,11 +1464,9 @@ ; CHECK-NEXT: adc.w r12, r12, r2 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov.u8 r2, q0[8] +; CHECK-NEXT: vmov.s8 r2, q0[8] ; CHECK-NEXT: adc.w r12, r12, r4, asr #31 -; CHECK-NEXT: vmov.u8 r4, q0[9] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r4, q0[9] ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: asrs r3, r4, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1519,11 +1477,9 @@ ; CHECK-NEXT: adc.w r12, r12, r2 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov.u8 r2, q0[10] +; CHECK-NEXT: vmov.s8 r2, q0[10] ; CHECK-NEXT: adc.w r12, r12, r4, asr #31 -; CHECK-NEXT: vmov.u8 r4, q0[11] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r4, q0[11] ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: asrs r3, r4, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1534,11 +1490,9 @@ ; CHECK-NEXT: adc.w r12, r12, r2 ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov.u8 r2, q0[12] +; CHECK-NEXT: vmov.s8 r2, q0[12] ; CHECK-NEXT: adc.w r12, r12, r4, asr #31 -; CHECK-NEXT: vmov.u8 r4, q0[13] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r4, q0[13] ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: asrs r3, r4, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1550,12 +1504,10 @@ ; CHECK-NEXT: vmov r2, s6 ; CHECK-NEXT: adds r2, r2, r3 ; CHECK-NEXT: adc.w r3, r12, r4, asr #31 -; CHECK-NEXT: vmov.u8 r4, q0[14] -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r4, q0[14] ; CHECK-NEXT: adds r2, r2, r4 ; CHECK-NEXT: adc.w r3, r3, r4, asr #31 -; CHECK-NEXT: vmov.u8 r4, q0[15] -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r4, q0[15] ; CHECK-NEXT: adds r2, r2, r4 ; CHECK-NEXT: adc.w r3, r3, r4, asr #31 ; CHECK-NEXT: adds r0, r0, r2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-addpred.ll @@ -317,10 +317,8 @@ ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r1 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r1 -; CHECK-NEXT: vmov.u16 r1, q0[1] -; CHECK-NEXT: vmov.u16 r2, q0[0] -; CHECK-NEXT: sxth r1, r1 -; CHECK-NEXT: sxth r2, r2 +; CHECK-NEXT: vmov.s16 r1, q0[1] +; CHECK-NEXT: vmov.s16 r2, q0[0] ; CHECK-NEXT: vmov q3[2], q3[0], r2, r1 ; CHECK-NEXT: asrs r1, r1, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -338,10 +336,8 @@ ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.u16 r3, q0[2] -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r0, q0[3] +; CHECK-NEXT: vmov.s16 r3, q0[2] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0 ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -369,10 +365,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r3 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r3 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.u16 r3, q0[4] -; CHECK-NEXT: sxth r0, r0 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r0, q0[5] +; CHECK-NEXT: vmov.s16 r3, q0[4] ; CHECK-NEXT: vmov q2[2], q2[0], r3, r0 ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -392,10 +386,8 @@ ; CHECK-NEXT: adc.w r1, r1, r12 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r3 -; CHECK-NEXT: vmov.u16 r2, q0[7] -; CHECK-NEXT: vmov.u16 r3, q0[6] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r3, r3 +; CHECK-NEXT: vmov.s16 r2, q0[7] +; CHECK-NEXT: vmov.s16 r3, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1050,10 +1042,8 @@ ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r1 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r1 -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: vmov.u8 r2, q0[0] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r1, q0[1] +; CHECK-NEXT: vmov.s8 r2, q0[0] ; CHECK-NEXT: vmov q6[2], q6[0], r2, r1 ; CHECK-NEXT: asrs r1, r1, #31 ; CHECK-NEXT: asrs r2, r2, #31 @@ -1071,10 +1061,8 @@ ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: vmov q5[2], q5[0], r0, r3 ; CHECK-NEXT: vmov q5[3], q5[1], r0, r3 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.u8 r3, q0[2] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q0[3] +; CHECK-NEXT: vmov.s8 r3, q0[2] ; CHECK-NEXT: vmov q6[2], q6[0], r3, r0 ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1102,10 +1090,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q4[2], q4[0], r0, r3 ; CHECK-NEXT: vmov q4[3], q4[1], r0, r3 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.u8 r3, q0[4] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q0[5] +; CHECK-NEXT: vmov.s8 r3, q0[4] ; CHECK-NEXT: vmov q5[2], q5[0], r3, r0 ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1125,10 +1111,8 @@ ; CHECK-NEXT: adc.w r1, r1, r12 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r3 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q0[7] -; CHECK-NEXT: vmov.u8 r3, q0[6] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q0[7] +; CHECK-NEXT: vmov.s8 r3, q0[6] ; CHECK-NEXT: vmov q5[2], q5[0], r3, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1174,10 +1158,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r3 ; CHECK-NEXT: vmov q2[3], q2[1], r0, r3 -; CHECK-NEXT: vmov.u8 r0, q0[9] -; CHECK-NEXT: vmov.u8 r3, q0[8] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q0[9] +; CHECK-NEXT: vmov.s8 r3, q0[8] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r0 ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1197,10 +1179,8 @@ ; CHECK-NEXT: adc.w r1, r1, r12 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q0[11] -; CHECK-NEXT: vmov.u8 r3, q0[10] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q0[11] +; CHECK-NEXT: vmov.s8 r3, q0[10] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1228,10 +1208,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r0, r3 ; CHECK-NEXT: vmov q1[3], q1[1], r0, r3 -; CHECK-NEXT: vmov.u8 r0, q0[13] -; CHECK-NEXT: vmov.u8 r3, q0[12] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q0[13] +; CHECK-NEXT: vmov.s8 r3, q0[12] ; CHECK-NEXT: vmov q2[2], q2[0], r3, r0 ; CHECK-NEXT: asrs r0, r0, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1251,10 +1229,8 @@ ; CHECK-NEXT: adc.w r1, r1, r12 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r3 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q0[15] -; CHECK-NEXT: vmov.u8 r3, q0[14] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q0[15] +; CHECK-NEXT: vmov.s8 r3, q0[14] ; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r3, r3, #31 @@ -1988,39 +1964,35 @@ ; CHECK-NEXT: vmov.u16 r2, q1[3] ; CHECK-NEXT: vmov.u16 r3, q1[1] ; CHECK-NEXT: vmov q2[3], q2[1], r3, r2 +; CHECK-NEXT: vmov.s16 r2, q0[0] ; CHECK-NEXT: vcmp.i32 ne, q2, zr -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r2, r12, #1 -; CHECK-NEXT: ubfx r3, r12, #4, #1 -; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsb.w r12, r3, #0 +; CHECK-NEXT: and r3, lr, #1 ; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 -; CHECK-NEXT: vmov.u16 r2, q0[1] -; CHECK-NEXT: vmov.u16 r3, q0[0] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r3, r3 -; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 -; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov q2[2], q2[0], r3, r12 +; CHECK-NEXT: vmov q2[3], q2[1], r3, r12 +; CHECK-NEXT: vmov.s16 r3, q0[1] +; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 ; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 ; CHECK-NEXT: vand q2, q3, q2 ; CHECK-NEXT: vmov r2, s10 ; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmov lr, s11 +; CHECK-NEXT: vmov r12, s11 ; CHECK-NEXT: vmov r3, s9 ; CHECK-NEXT: adds r5, r4, r2 -; CHECK-NEXT: ubfx r4, r12, #12, #1 -; CHECK-NEXT: ubfx r2, r12, #8, #1 +; CHECK-NEXT: ubfx r4, lr, #12, #1 +; CHECK-NEXT: ubfx r2, lr, #8, #1 ; CHECK-NEXT: rsb.w r4, r4, #0 ; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: adc.w r3, r3, lr +; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r4 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r4 -; CHECK-NEXT: vmov.u16 r2, q0[3] -; CHECK-NEXT: vmov.u16 r4, q0[2] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: vmov.s16 r2, q0[3] +; CHECK-NEXT: vmov.s16 r4, q0[2] ; CHECK-NEXT: vmov q3[2], q3[0], r4, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2048,10 +2020,8 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r4 -; CHECK-NEXT: vmov.u16 r2, q0[5] -; CHECK-NEXT: vmov.u16 r4, q0[4] -; CHECK-NEXT: sxth r2, r2 -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: vmov.s16 r2, q0[5] +; CHECK-NEXT: vmov.s16 r4, q0[4] ; CHECK-NEXT: vmov q2[2], q2[0], r4, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2071,10 +2041,8 @@ ; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q1[2], q1[0], r5, r4 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r4 -; CHECK-NEXT: vmov.u16 r5, q0[7] -; CHECK-NEXT: vmov.u16 r4, q0[6] -; CHECK-NEXT: sxth r5, r5 -; CHECK-NEXT: sxth r4, r4 +; CHECK-NEXT: vmov.s16 r5, q0[7] +; CHECK-NEXT: vmov.s16 r4, q0[6] ; CHECK-NEXT: vmov q0[2], q0[0], r4, r5 ; CHECK-NEXT: asrs r5, r5, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2611,39 +2579,35 @@ ; CHECK-NEXT: vmov.u16 r2, q4[3] ; CHECK-NEXT: vmov.u16 r3, q4[1] ; CHECK-NEXT: vmov q5[3], q5[1], r3, r2 +; CHECK-NEXT: vmov.s8 r2, q0[0] ; CHECK-NEXT: vcmp.i32 ne, q5, zr -; CHECK-NEXT: vmrs r12, p0 -; CHECK-NEXT: and r2, r12, #1 -; CHECK-NEXT: ubfx r3, r12, #4, #1 -; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: vmrs lr, p0 +; CHECK-NEXT: ubfx r3, lr, #4, #1 +; CHECK-NEXT: rsb.w r12, r3, #0 +; CHECK-NEXT: and r3, lr, #1 ; CHECK-NEXT: rsbs r3, r3, #0 -; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 -; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q0[1] -; CHECK-NEXT: vmov.u8 r3, q0[0] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 -; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov q5[2], q5[0], r3, r12 +; CHECK-NEXT: vmov q5[3], q5[1], r3, r12 +; CHECK-NEXT: vmov.s8 r3, q0[1] +; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 ; CHECK-NEXT: asrs r3, r3, #31 -; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 +; CHECK-NEXT: asrs r2, r2, #31 +; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 ; CHECK-NEXT: vand q5, q6, q5 ; CHECK-NEXT: vmov r2, s22 ; CHECK-NEXT: vmov r4, s20 -; CHECK-NEXT: vmov lr, s23 +; CHECK-NEXT: vmov r12, s23 ; CHECK-NEXT: vmov r3, s21 ; CHECK-NEXT: adds r5, r4, r2 -; CHECK-NEXT: ubfx r4, r12, #12, #1 -; CHECK-NEXT: ubfx r2, r12, #8, #1 +; CHECK-NEXT: ubfx r4, lr, #12, #1 +; CHECK-NEXT: ubfx r2, lr, #8, #1 ; CHECK-NEXT: rsb.w r4, r4, #0 ; CHECK-NEXT: rsb.w r2, r2, #0 -; CHECK-NEXT: adc.w r3, r3, lr +; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q5[2], q5[0], r2, r4 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r4 -; CHECK-NEXT: vmov.u8 r2, q0[3] -; CHECK-NEXT: vmov.u8 r4, q0[2] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r2, q0[3] +; CHECK-NEXT: vmov.s8 r4, q0[2] ; CHECK-NEXT: vmov q6[2], q6[0], r4, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2671,10 +2635,8 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: vmov q4[2], q4[0], r2, r4 ; CHECK-NEXT: vmov q4[3], q4[1], r2, r4 -; CHECK-NEXT: vmov.u8 r2, q0[5] -; CHECK-NEXT: vmov.u8 r4, q0[4] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r2, q0[5] +; CHECK-NEXT: vmov.s8 r4, q0[4] ; CHECK-NEXT: vmov q5[2], q5[0], r4, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2694,10 +2656,8 @@ ; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q4[2], q4[0], r5, r4 ; CHECK-NEXT: vmov q4[3], q4[1], r5, r4 -; CHECK-NEXT: vmov.u8 r5, q0[7] -; CHECK-NEXT: vmov.u8 r4, q0[6] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r5, q0[7] +; CHECK-NEXT: vmov.s8 r4, q0[6] ; CHECK-NEXT: vmov q5[2], q5[0], r4, r5 ; CHECK-NEXT: asrs r5, r5, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2743,10 +2703,8 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r4 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r4 -; CHECK-NEXT: vmov.u8 r2, q0[9] -; CHECK-NEXT: vmov.u8 r4, q0[8] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r2, q0[9] +; CHECK-NEXT: vmov.s8 r4, q0[8] ; CHECK-NEXT: vmov q3[2], q3[0], r4, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2766,10 +2724,8 @@ ; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q2[2], q2[0], r5, r4 ; CHECK-NEXT: vmov q2[3], q2[1], r5, r4 -; CHECK-NEXT: vmov.u8 r5, q0[11] -; CHECK-NEXT: vmov.u8 r4, q0[10] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r5, q0[11] +; CHECK-NEXT: vmov.s8 r4, q0[10] ; CHECK-NEXT: vmov q3[2], q3[0], r4, r5 ; CHECK-NEXT: asrs r5, r5, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2797,10 +2753,8 @@ ; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: vmov q1[2], q1[0], r2, r4 ; CHECK-NEXT: vmov q1[3], q1[1], r2, r4 -; CHECK-NEXT: vmov.u8 r2, q0[13] -; CHECK-NEXT: vmov.u8 r4, q0[12] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r2, q0[13] +; CHECK-NEXT: vmov.s8 r4, q0[12] ; CHECK-NEXT: vmov q2[2], q2[0], r4, r2 ; CHECK-NEXT: asrs r2, r2, #31 ; CHECK-NEXT: asrs r4, r4, #31 @@ -2820,10 +2774,8 @@ ; CHECK-NEXT: adc.w r3, r3, r12 ; CHECK-NEXT: vmov q1[2], q1[0], r5, r4 ; CHECK-NEXT: vmov q1[3], q1[1], r5, r4 -; CHECK-NEXT: vmov.u8 r5, q0[15] -; CHECK-NEXT: vmov.u8 r4, q0[14] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r5, q0[15] +; CHECK-NEXT: vmov.s8 r4, q0[14] ; CHECK-NEXT: vmov q0[2], q0[0], r4, r5 ; CHECK-NEXT: asrs r5, r5, #31 ; CHECK-NEXT: asrs r4, r4, #31 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mla.ll @@ -737,14 +737,10 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r7, lr} ; CHECK-NEXT: push {r7, lr} -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.u8 r1, q0[1] -; CHECK-NEXT: vmov.u8 r2, q1[0] -; CHECK-NEXT: vmov.u8 r3, q0[0] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q1[1] +; CHECK-NEXT: vmov.s8 r1, q0[1] +; CHECK-NEXT: vmov.s8 r2, q1[0] +; CHECK-NEXT: vmov.s8 r3, q0[0] ; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r0 @@ -753,15 +749,11 @@ ; CHECK-NEXT: vmov r3, s8 ; CHECK-NEXT: vmov r0, s9 ; CHECK-NEXT: adds.w lr, r3, r2 -; CHECK-NEXT: vmov.u8 r3, q0[3] +; CHECK-NEXT: vmov.s8 r3, q0[3] ; CHECK-NEXT: adc.w r12, r0, r1 -; CHECK-NEXT: vmov.u8 r1, q1[3] -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.u8 r2, q0[2] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r1, q1[3] +; CHECK-NEXT: vmov.s8 r0, q1[2] +; CHECK-NEXT: vmov.s8 r2, q0[2] ; CHECK-NEXT: smull r1, r3, r3, r1 ; CHECK-NEXT: smull r0, r2, r2, r0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r1 @@ -772,15 +764,11 @@ ; CHECK-NEXT: adds.w r1, r1, lr ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds.w lr, r1, r2 -; CHECK-NEXT: vmov.u8 r2, q1[5] +; CHECK-NEXT: vmov.s8 r2, q1[5] ; CHECK-NEXT: adc.w r12, r0, r3 -; CHECK-NEXT: vmov.u8 r3, q0[5] -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.u8 r1, q0[4] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r3, q0[5] +; CHECK-NEXT: vmov.s8 r0, q1[4] +; CHECK-NEXT: vmov.s8 r1, q0[4] ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r2 @@ -791,15 +779,11 @@ ; CHECK-NEXT: adds.w r1, r1, lr ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds.w lr, r1, r2 -; CHECK-NEXT: vmov.u8 r2, q1[7] +; CHECK-NEXT: vmov.s8 r2, q1[7] ; CHECK-NEXT: adc.w r12, r0, r3 -; CHECK-NEXT: vmov.u8 r3, q0[7] -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.u8 r1, q0[6] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r3, q0[7] +; CHECK-NEXT: vmov.s8 r0, q1[6] +; CHECK-NEXT: vmov.s8 r1, q0[6] ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r2 @@ -810,15 +794,11 @@ ; CHECK-NEXT: adds.w r1, r1, lr ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds.w lr, r1, r2 -; CHECK-NEXT: vmov.u8 r2, q1[9] +; CHECK-NEXT: vmov.s8 r2, q1[9] ; CHECK-NEXT: adc.w r12, r0, r3 -; CHECK-NEXT: vmov.u8 r3, q0[9] -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmov.u8 r1, q0[8] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r3, q0[9] +; CHECK-NEXT: vmov.s8 r0, q1[8] +; CHECK-NEXT: vmov.s8 r1, q0[8] ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r2 @@ -829,15 +809,11 @@ ; CHECK-NEXT: adds.w r1, r1, lr ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds.w lr, r1, r2 -; CHECK-NEXT: vmov.u8 r2, q1[11] +; CHECK-NEXT: vmov.s8 r2, q1[11] ; CHECK-NEXT: adc.w r12, r0, r3 -; CHECK-NEXT: vmov.u8 r3, q0[11] -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.u8 r1, q0[10] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r3, q0[11] +; CHECK-NEXT: vmov.s8 r0, q1[10] +; CHECK-NEXT: vmov.s8 r1, q0[10] ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r2 @@ -848,15 +824,11 @@ ; CHECK-NEXT: adds.w r1, r1, lr ; CHECK-NEXT: adc.w r0, r0, r12 ; CHECK-NEXT: adds.w lr, r1, r2 -; CHECK-NEXT: vmov.u8 r2, q1[13] +; CHECK-NEXT: vmov.s8 r2, q1[13] ; CHECK-NEXT: adc.w r12, r0, r3 -; CHECK-NEXT: vmov.u8 r3, q0[13] -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.u8 r1, q0[12] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: vmov.s8 r3, q0[13] +; CHECK-NEXT: vmov.s8 r0, q1[12] +; CHECK-NEXT: vmov.s8 r1, q0[12] ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q2[2], q2[0], r0, r2 @@ -868,15 +840,11 @@ ; CHECK-NEXT: vmov r0, s10 ; CHECK-NEXT: adds r0, r0, r1 ; CHECK-NEXT: adc.w r1, r2, r3 -; CHECK-NEXT: vmov.u8 r2, q1[14] -; CHECK-NEXT: vmov.u8 r3, q0[14] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[14] +; CHECK-NEXT: vmov.s8 r3, q0[14] ; CHECK-NEXT: smlal r0, r1, r3, r2 -; CHECK-NEXT: vmov.u8 r2, q1[15] -; CHECK-NEXT: vmov.u8 r3, q0[15] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[15] +; CHECK-NEXT: vmov.s8 r3, q0[15] ; CHECK-NEXT: smlal r0, r1, r3, r2 ; CHECK-NEXT: pop {r7, pc} entry: @@ -1690,20 +1658,14 @@ ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .save {r4, r5, r7, lr} ; CHECK-NEXT: push {r4, r5, r7, lr} -; CHECK-NEXT: vmov.u8 r2, q1[1] -; CHECK-NEXT: vmov.u8 r3, q0[1] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[1] +; CHECK-NEXT: vmov.s8 r3, q0[1] ; CHECK-NEXT: smull r12, r3, r3, r2 -; CHECK-NEXT: vmov.u8 r2, q1[0] -; CHECK-NEXT: sxtb.w lr, r2 -; CHECK-NEXT: vmov.u8 r2, q0[0] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: vmov.u8 r4, q1[2] +; CHECK-NEXT: vmov.s8 lr, q1[0] +; CHECK-NEXT: vmov.s8 r2, q0[0] +; CHECK-NEXT: vmov.s8 r4, q1[2] +; CHECK-NEXT: vmov.s8 r5, q0[2] ; CHECK-NEXT: smull r2, lr, r2, lr -; CHECK-NEXT: vmov.u8 r5, q0[2] -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r5, r5 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r12 ; CHECK-NEXT: smull r4, r5, r5, r4 ; CHECK-NEXT: vmov q2[3], q2[1], lr, r3 @@ -1711,11 +1673,9 @@ ; CHECK-NEXT: vmov r2, s8 ; CHECK-NEXT: vmov r12, s9 ; CHECK-NEXT: adds.w lr, lr, r2 -; CHECK-NEXT: vmov.u8 r2, q1[3] +; CHECK-NEXT: vmov.s8 r2, q1[3] ; CHECK-NEXT: adc.w r12, r12, r3 -; CHECK-NEXT: vmov.u8 r3, q0[3] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r3, q0[3] ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r2 ; CHECK-NEXT: vmov q2[3], q2[1], r5, r3 @@ -1725,15 +1685,11 @@ ; CHECK-NEXT: vmov r4, s10 ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: adds.w lr, r5, r4 -; CHECK-NEXT: vmov.u8 r4, q0[5] +; CHECK-NEXT: vmov.s8 r4, q0[5] ; CHECK-NEXT: adc.w r12, r2, r3 -; CHECK-NEXT: vmov.u8 r3, q1[5] -; CHECK-NEXT: vmov.u8 r2, q1[4] -; CHECK-NEXT: vmov.u8 r5, q0[4] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r5, r5 +; CHECK-NEXT: vmov.s8 r3, q1[5] +; CHECK-NEXT: vmov.s8 r2, q1[4] +; CHECK-NEXT: vmov.s8 r5, q0[4] ; CHECK-NEXT: smull r3, r4, r4, r3 ; CHECK-NEXT: smull r2, r5, r5, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 @@ -1744,15 +1700,11 @@ ; CHECK-NEXT: adds.w r3, r3, lr ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: adds.w lr, r3, r5 -; CHECK-NEXT: vmov.u8 r5, q1[7] +; CHECK-NEXT: vmov.s8 r5, q1[7] ; CHECK-NEXT: adc.w r12, r2, r4 -; CHECK-NEXT: vmov.u8 r4, q0[7] -; CHECK-NEXT: vmov.u8 r2, q1[6] -; CHECK-NEXT: vmov.u8 r3, q0[6] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r4, q0[7] +; CHECK-NEXT: vmov.s8 r2, q1[6] +; CHECK-NEXT: vmov.s8 r3, q0[6] ; CHECK-NEXT: smull r5, r4, r4, r5 ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r5 @@ -1763,15 +1715,11 @@ ; CHECK-NEXT: adds.w r3, r3, lr ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: adds.w lr, r3, r5 -; CHECK-NEXT: vmov.u8 r5, q1[9] +; CHECK-NEXT: vmov.s8 r5, q1[9] ; CHECK-NEXT: adc.w r12, r2, r4 -; CHECK-NEXT: vmov.u8 r4, q0[9] -; CHECK-NEXT: vmov.u8 r2, q1[8] -; CHECK-NEXT: vmov.u8 r3, q0[8] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r4, q0[9] +; CHECK-NEXT: vmov.s8 r2, q1[8] +; CHECK-NEXT: vmov.s8 r3, q0[8] ; CHECK-NEXT: smull r5, r4, r4, r5 ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r5 @@ -1782,15 +1730,11 @@ ; CHECK-NEXT: adds.w r3, r3, lr ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: adds.w lr, r3, r5 -; CHECK-NEXT: vmov.u8 r5, q1[11] +; CHECK-NEXT: vmov.s8 r5, q1[11] ; CHECK-NEXT: adc.w r12, r2, r4 -; CHECK-NEXT: vmov.u8 r4, q0[11] -; CHECK-NEXT: vmov.u8 r2, q1[10] -; CHECK-NEXT: vmov.u8 r3, q0[10] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r4, q0[11] +; CHECK-NEXT: vmov.s8 r2, q1[10] +; CHECK-NEXT: vmov.s8 r3, q0[10] ; CHECK-NEXT: smull r5, r4, r4, r5 ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r5 @@ -1801,15 +1745,11 @@ ; CHECK-NEXT: adds.w r3, r3, lr ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: adds.w lr, r3, r5 -; CHECK-NEXT: vmov.u8 r5, q1[13] +; CHECK-NEXT: vmov.s8 r5, q1[13] ; CHECK-NEXT: adc.w r12, r2, r4 -; CHECK-NEXT: vmov.u8 r4, q0[13] -; CHECK-NEXT: vmov.u8 r2, q1[12] -; CHECK-NEXT: vmov.u8 r3, q0[12] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r4, q0[13] +; CHECK-NEXT: vmov.s8 r2, q1[12] +; CHECK-NEXT: vmov.s8 r3, q0[12] ; CHECK-NEXT: smull r5, r4, r4, r5 ; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vmov q2[2], q2[0], r2, r5 @@ -1820,16 +1760,12 @@ ; CHECK-NEXT: adds.w r3, r3, lr ; CHECK-NEXT: adc.w r2, r2, r12 ; CHECK-NEXT: adds r3, r3, r5 -; CHECK-NEXT: vmov.u8 r5, q1[14] +; CHECK-NEXT: vmov.s8 r5, q1[14] ; CHECK-NEXT: adcs r2, r4 -; CHECK-NEXT: vmov.u8 r4, q0[14] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r4, q0[14] ; CHECK-NEXT: smlal r3, r2, r4, r5 -; CHECK-NEXT: vmov.u8 r5, q1[15] -; CHECK-NEXT: vmov.u8 r4, q0[15] -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r5, q1[15] +; CHECK-NEXT: vmov.s8 r4, q0[15] ; CHECK-NEXT: smlal r3, r2, r4, r5 ; CHECK-NEXT: adds r0, r0, r3 ; CHECK-NEXT: adcs r1, r2 diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll --- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll +++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-mlapred.ll @@ -1127,11 +1127,10 @@ ; CHECK-NEXT: vcmp.i8 eq, q2, zr ; CHECK-NEXT: vmov.i8 q2, #0x0 ; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: vmov.u8 r3, q1[0] +; CHECK-NEXT: vmov.s8 r3, q1[0] ; CHECK-NEXT: vpsel q4, q3, q2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r4, q0[4] ; CHECK-NEXT: vmov.u8 r0, q4[0] -; CHECK-NEXT: vmov.u8 r4, q0[4] ; CHECK-NEXT: vmov.16 q5[0], r0 ; CHECK-NEXT: vmov.u8 r0, q4[1] ; CHECK-NEXT: vmov.16 q5[1], r0 @@ -1147,7 +1146,6 @@ ; CHECK-NEXT: vmov.16 q5[6], r0 ; CHECK-NEXT: vmov.u8 r0, q4[7] ; CHECK-NEXT: vmov.16 q5[7], r0 -; CHECK-NEXT: sxtb r4, r4 ; CHECK-NEXT: vcmp.i16 ne, q5, zr ; CHECK-NEXT: vpsel q5, q3, q2 ; CHECK-NEXT: vmov.u16 r0, q5[2] @@ -1164,13 +1162,10 @@ ; CHECK-NEXT: rsbs r1, r1, #0 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r1 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r1 -; CHECK-NEXT: vmov.u8 r1, q1[1] -; CHECK-NEXT: vmov.u8 r2, q0[1] -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r1, q1[1] +; CHECK-NEXT: vmov.s8 r2, q0[1] ; CHECK-NEXT: smull r1, r12, r2, r1 -; CHECK-NEXT: vmov.u8 r2, q0[0] -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q0[0] ; CHECK-NEXT: smull r2, r3, r2, r3 ; CHECK-NEXT: vmov q7[2], q7[0], r2, r1 ; CHECK-NEXT: vmov q7[3], q7[1], r3, r12 @@ -1184,17 +1179,13 @@ ; CHECK-NEXT: ubfx r0, r0, #8, #1 ; CHECK-NEXT: rsb.w r3, r3, #0 ; CHECK-NEXT: rsb.w r0, r0, #0 -; CHECK-NEXT: vmov.u8 r1, q1[2] -; CHECK-NEXT: vmov q6[2], q6[0], r0, r3 ; CHECK-NEXT: adc.w r12, r12, r2 +; CHECK-NEXT: vmov q6[2], q6[0], r0, r3 +; CHECK-NEXT: vmov.s8 r1, q1[2] ; CHECK-NEXT: vmov q6[3], q6[1], r0, r3 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.u8 r3, q0[3] -; CHECK-NEXT: vmov.u8 r2, q0[2] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r1, r1 -; CHECK-NEXT: sxtb r2, r2 +; CHECK-NEXT: vmov.s8 r2, q0[2] +; CHECK-NEXT: vmov.s8 r0, q1[3] +; CHECK-NEXT: vmov.s8 r3, q0[3] ; CHECK-NEXT: smull r0, r3, r3, r0 ; CHECK-NEXT: smull r1, r2, r2, r1 ; CHECK-NEXT: vmov q7[2], q7[0], r1, r0 @@ -1207,15 +1198,14 @@ ; CHECK-NEXT: adc.w r2, r12, r0 ; CHECK-NEXT: vmov r0, s26 ; CHECK-NEXT: adds.w r12, r1, r0 -; CHECK-NEXT: vmov.u8 r1, q1[4] +; CHECK-NEXT: vmov.s8 r1, q1[4] ; CHECK-NEXT: adc.w lr, r2, r3 ; CHECK-NEXT: vmov.u16 r2, q5[6] ; CHECK-NEXT: vmov.u16 r3, q5[4] -; CHECK-NEXT: sxtb r1, r1 +; CHECK-NEXT: smull r1, r4, r4, r1 ; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 ; CHECK-NEXT: vmov.u16 r2, q5[7] ; CHECK-NEXT: vmov.u16 r3, q5[5] -; CHECK-NEXT: smull r1, r4, r4, r1 ; CHECK-NEXT: vmov q6[3], q6[1], r3, r2 ; CHECK-NEXT: vcmp.i32 ne, q6, zr ; CHECK-NEXT: vmrs r2, p0 @@ -1225,10 +1215,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q5[2], q5[0], r0, r3 ; CHECK-NEXT: vmov q5[3], q5[1], r0, r3 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.u8 r3, q0[5] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r0, q1[5] +; CHECK-NEXT: vmov.s8 r3, q0[5] ; CHECK-NEXT: smull r0, r3, r3, r0 ; CHECK-NEXT: vmov q6[2], q6[0], r1, r0 ; CHECK-NEXT: vmov q6[3], q6[1], r4, r3 @@ -1240,23 +1228,19 @@ ; CHECK-NEXT: adds.w r1, r1, r12 ; CHECK-NEXT: adc.w r0, r0, lr ; CHECK-NEXT: adds r1, r1, r4 -; CHECK-NEXT: vmov.u8 r4, q1[6] +; CHECK-NEXT: vmov.s8 r4, q1[6] ; CHECK-NEXT: adc.w r12, r0, r3 ; CHECK-NEXT: ubfx r3, r2, #12, #1 ; CHECK-NEXT: ubfx r2, r2, #8, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.u8 r0, q0[6] +; CHECK-NEXT: vmov.s8 r0, q0[6] ; CHECK-NEXT: vmov q5[2], q5[0], r2, r3 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: smull r0, r4, r0, r4 ; CHECK-NEXT: vmov q5[3], q5[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q1[7] -; CHECK-NEXT: vmov.u8 r3, q0[7] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[7] +; CHECK-NEXT: vmov.s8 r3, q0[7] ; CHECK-NEXT: smull r2, r3, r3, r2 -; CHECK-NEXT: smull r0, r4, r0, r4 ; CHECK-NEXT: vmov q6[2], q6[0], r0, r2 ; CHECK-NEXT: vmov q6[3], q6[1], r4, r3 ; CHECK-NEXT: vand q5, q6, q5 @@ -1267,7 +1251,7 @@ ; CHECK-NEXT: adc.w r2, r12, r0 ; CHECK-NEXT: vmov r0, s22 ; CHECK-NEXT: adds.w r12, r1, r0 -; CHECK-NEXT: vmov.u8 r0, q1[8] +; CHECK-NEXT: vmov.s8 r0, q1[8] ; CHECK-NEXT: adc.w lr, r2, r3 ; CHECK-NEXT: vmov.u8 r2, q4[8] ; CHECK-NEXT: vmov.16 q5[0], r2 @@ -1285,17 +1269,15 @@ ; CHECK-NEXT: vmov.16 q5[6], r2 ; CHECK-NEXT: vmov.u8 r2, q4[15] ; CHECK-NEXT: vmov.16 q5[7], r2 -; CHECK-NEXT: vmov.u8 r1, q0[8] +; CHECK-NEXT: vmov.s8 r1, q0[8] ; CHECK-NEXT: vcmp.i16 ne, q5, zr -; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vpsel q2, q3, q2 -; CHECK-NEXT: sxtb r1, r1 ; CHECK-NEXT: vmov.u16 r2, q2[2] ; CHECK-NEXT: vmov.u16 r3, q2[0] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 ; CHECK-NEXT: vmov.u16 r2, q2[3] ; CHECK-NEXT: vmov.u16 r3, q2[1] -; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 ; CHECK-NEXT: vcmp.i32 ne, q3, zr ; CHECK-NEXT: vmrs r2, p0 @@ -1305,10 +1287,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q3[2], q3[0], r4, r3 ; CHECK-NEXT: vmov q3[3], q3[1], r4, r3 -; CHECK-NEXT: vmov.u8 r3, q1[9] -; CHECK-NEXT: vmov.u8 r4, q0[9] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r3, q1[9] +; CHECK-NEXT: vmov.s8 r4, q0[9] ; CHECK-NEXT: smull r3, r4, r4, r3 ; CHECK-NEXT: vmov q4[2], q4[0], r0, r3 ; CHECK-NEXT: vmov q4[3], q4[1], r1, r4 @@ -1320,23 +1300,19 @@ ; CHECK-NEXT: adds.w r1, r1, r12 ; CHECK-NEXT: adc.w r0, r0, lr ; CHECK-NEXT: adds r1, r1, r4 -; CHECK-NEXT: vmov.u8 r4, q1[10] +; CHECK-NEXT: vmov.s8 r4, q1[10] ; CHECK-NEXT: adc.w r12, r0, r3 ; CHECK-NEXT: ubfx r3, r2, #12, #1 ; CHECK-NEXT: ubfx r2, r2, #8, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.u8 r0, q0[10] +; CHECK-NEXT: vmov.s8 r0, q0[10] ; CHECK-NEXT: vmov q3[2], q3[0], r2, r3 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: smull r0, r4, r0, r4 ; CHECK-NEXT: vmov q3[3], q3[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q1[11] -; CHECK-NEXT: vmov.u8 r3, q0[11] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[11] +; CHECK-NEXT: vmov.s8 r3, q0[11] ; CHECK-NEXT: smull r2, r3, r3, r2 -; CHECK-NEXT: smull r0, r4, r0, r4 ; CHECK-NEXT: vmov q4[2], q4[0], r0, r2 ; CHECK-NEXT: vmov q4[3], q4[1], r4, r3 ; CHECK-NEXT: vand q3, q4, q3 @@ -1347,19 +1323,17 @@ ; CHECK-NEXT: adc.w r2, r12, r0 ; CHECK-NEXT: vmov r0, s14 ; CHECK-NEXT: adds.w r12, r1, r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] +; CHECK-NEXT: vmov.s8 r0, q1[12] ; CHECK-NEXT: adc.w lr, r2, r3 ; CHECK-NEXT: vmov.u16 r2, q2[6] ; CHECK-NEXT: vmov.u16 r3, q2[4] -; CHECK-NEXT: vmov.u8 r1, q0[12] +; CHECK-NEXT: vmov.s8 r1, q0[12] ; CHECK-NEXT: vmov q3[2], q3[0], r3, r2 ; CHECK-NEXT: vmov.u16 r2, q2[7] ; CHECK-NEXT: vmov.u16 r3, q2[5] -; CHECK-NEXT: sxtb r0, r0 +; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmov q3[3], q3[1], r3, r2 -; CHECK-NEXT: sxtb r1, r1 ; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: smull r0, r1, r1, r0 ; CHECK-NEXT: vmrs r2, p0 ; CHECK-NEXT: and r4, r2, #1 ; CHECK-NEXT: ubfx r3, r2, #4, #1 @@ -1367,10 +1341,8 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q2[2], q2[0], r4, r3 ; CHECK-NEXT: vmov q2[3], q2[1], r4, r3 -; CHECK-NEXT: vmov.u8 r3, q1[13] -; CHECK-NEXT: vmov.u8 r4, q0[13] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r3, q1[13] +; CHECK-NEXT: vmov.s8 r4, q0[13] ; CHECK-NEXT: smull r3, r4, r4, r3 ; CHECK-NEXT: vmov q3[2], q3[0], r0, r3 ; CHECK-NEXT: vmov q3[3], q3[1], r1, r4 @@ -1382,23 +1354,19 @@ ; CHECK-NEXT: adds.w r1, r1, r12 ; CHECK-NEXT: adc.w r0, r0, lr ; CHECK-NEXT: adds r1, r1, r4 -; CHECK-NEXT: vmov.u8 r4, q1[14] +; CHECK-NEXT: vmov.s8 r4, q1[14] ; CHECK-NEXT: adc.w r12, r0, r3 ; CHECK-NEXT: ubfx r3, r2, #12, #1 ; CHECK-NEXT: ubfx r2, r2, #8, #1 ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.u8 r0, q0[14] +; CHECK-NEXT: vmov.s8 r0, q0[14] ; CHECK-NEXT: vmov q2[2], q2[0], r2, r3 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: smull r0, r4, r0, r4 ; CHECK-NEXT: vmov q2[3], q2[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q1[15] -; CHECK-NEXT: vmov.u8 r3, q0[15] -; CHECK-NEXT: sxtb r0, r0 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[15] +; CHECK-NEXT: vmov.s8 r3, q0[15] ; CHECK-NEXT: smull r2, r3, r3, r2 -; CHECK-NEXT: smull r0, r4, r0, r4 ; CHECK-NEXT: vmov q0[2], q0[0], r0, r2 ; CHECK-NEXT: vmov q0[3], q0[1], r4, r3 ; CHECK-NEXT: vand q0, q0, q2 @@ -2637,11 +2605,10 @@ ; CHECK-NEXT: vcmp.i8 eq, q2, zr ; CHECK-NEXT: vmov.i8 q2, #0x0 ; CHECK-NEXT: vmov.i8 q3, #0xff -; CHECK-NEXT: vmov.u8 r4, q0[0] +; CHECK-NEXT: vmov.s8 r4, q0[0] ; CHECK-NEXT: vpsel q4, q3, q2 -; CHECK-NEXT: sxtb r4, r4 +; CHECK-NEXT: vmov.s8 r5, q0[2] ; CHECK-NEXT: vmov.u8 r2, q4[0] -; CHECK-NEXT: vmov.u8 r5, q0[2] ; CHECK-NEXT: vmov.16 q5[0], r2 ; CHECK-NEXT: vmov.u8 r2, q4[1] ; CHECK-NEXT: vmov.16 q5[1], r2 @@ -2657,7 +2624,6 @@ ; CHECK-NEXT: vmov.16 q5[6], r2 ; CHECK-NEXT: vmov.u8 r2, q4[7] ; CHECK-NEXT: vmov.16 q5[7], r2 -; CHECK-NEXT: sxtb r5, r5 ; CHECK-NEXT: vcmp.i16 ne, q5, zr ; CHECK-NEXT: vpsel q5, q3, q2 ; CHECK-NEXT: vmov.u16 r2, q5[2] @@ -2674,13 +2640,10 @@ ; CHECK-NEXT: rsbs r3, r3, #0 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r3 ; CHECK-NEXT: vmov q6[3], q6[1], r2, r3 -; CHECK-NEXT: vmov.u8 r2, q1[1] -; CHECK-NEXT: vmov.u8 r3, q0[1] -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r2, q1[1] +; CHECK-NEXT: vmov.s8 r3, q0[1] ; CHECK-NEXT: smull r2, lr, r3, r2 -; CHECK-NEXT: vmov.u8 r3, q1[0] -; CHECK-NEXT: sxtb r3, r3 +; CHECK-NEXT: vmov.s8 r3, q1[0] ; CHECK-NEXT: smull r3, r4, r4, r3 ; CHECK-NEXT: vmov q7[2], q7[0], r3, r2 ; CHECK-NEXT: vmov q7[3], q7[1], r4, lr @@ -2696,223 +2659,196 @@ ; CHECK-NEXT: rsb.w r2, r2, #0 ; CHECK-NEXT: adc.w lr, lr, r3 ; CHECK-NEXT: vmov q6[2], q6[0], r2, r4 -; CHECK-NEXT: vmov.u8 r3, q1[2] +; CHECK-NEXT: vmov.s8 r3, q1[2] ; CHECK-NEXT: vmov q6[3], q6[1], r2, r4 -; CHECK-NEXT: vmov.u8 r2, q1[3] -; CHECK-NEXT: vmov.u8 r4, q0[3] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: smull r2, r4, r4, r2 +; CHECK-NEXT: vmov.s8 r2, q1[3] +; CHECK-NEXT: vmov.s8 r4, q0[3] ; CHECK-NEXT: smull r3, r5, r5, r3 +; CHECK-NEXT: smull r2, r4, r4, r2 ; CHECK-NEXT: vmov q7[2], q7[0], r3, r2 ; CHECK-NEXT: vmov q7[3], q7[1], r5, r4 -; CHECK-NEXT: vmov.u8 r4, q1[4] ; CHECK-NEXT: vand q6, q7, q6 -; CHECK-NEXT: sxtb r4, r4 ; CHECK-NEXT: vmov r3, s24 ; CHECK-NEXT: vmov r2, s25 -; CHECK-NEXT: vmov r5, s26 +; CHECK-NEXT: vmov r5, s27 ; CHECK-NEXT: adds r3, r3, r6 -; CHECK-NEXT: vmov r6, s27 -; CHECK-NEXT: adc.w r2, r2, lr -; CHECK-NEXT: adds.w r12, r3, r5 -; CHECK-NEXT: vmov.u8 r3, q0[4] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: smull r3, r4, r3, r4 -; CHECK-NEXT: adc.w lr, r2, r6 -; CHECK-NEXT: vmov.u16 r2, q5[6] -; CHECK-NEXT: vmov.u16 r6, q5[4] -; CHECK-NEXT: vmov q6[2], q6[0], r6, r2 -; CHECK-NEXT: vmov.u16 r2, q5[7] -; CHECK-NEXT: vmov.u16 r6, q5[5] -; CHECK-NEXT: vmov q6[3], q6[1], r6, r2 +; CHECK-NEXT: adc.w r6, lr, r2 +; CHECK-NEXT: vmov r2, s26 +; CHECK-NEXT: adds.w r12, r3, r2 +; CHECK-NEXT: vmov.s8 r2, q1[4] +; CHECK-NEXT: adc.w lr, r6, r5 +; CHECK-NEXT: vmov.u16 r6, q5[6] +; CHECK-NEXT: vmov.u16 r5, q5[4] +; CHECK-NEXT: vmov.s8 r3, q0[4] +; CHECK-NEXT: vmov q6[2], q6[0], r5, r6 +; CHECK-NEXT: vmov.u16 r6, q5[7] +; CHECK-NEXT: vmov.u16 r5, q5[5] +; CHECK-NEXT: smull r2, r3, r3, r2 +; CHECK-NEXT: vmov q6[3], q6[1], r5, r6 ; CHECK-NEXT: vcmp.i32 ne, q6, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r5, r2, #1 -; CHECK-NEXT: ubfx r6, r2, #4, #1 +; CHECK-NEXT: vmrs r6, p0 +; CHECK-NEXT: and r4, r6, #1 +; CHECK-NEXT: ubfx r5, r6, #4, #1 +; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: rsbs r6, r6, #0 -; CHECK-NEXT: vmov q5[2], q5[0], r5, r6 -; CHECK-NEXT: vmov q5[3], q5[1], r5, r6 -; CHECK-NEXT: vmov.u8 r6, q1[5] -; CHECK-NEXT: vmov.u8 r5, q0[5] -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: smull r6, r5, r5, r6 -; CHECK-NEXT: vmov q6[2], q6[0], r3, r6 -; CHECK-NEXT: vmov q6[3], q6[1], r4, r5 +; CHECK-NEXT: vmov q5[2], q5[0], r4, r5 +; CHECK-NEXT: vmov q5[3], q5[1], r4, r5 +; CHECK-NEXT: vmov.s8 r5, q1[5] +; CHECK-NEXT: vmov.s8 r4, q0[5] +; CHECK-NEXT: smull r5, r4, r4, r5 +; CHECK-NEXT: vmov q6[2], q6[0], r2, r5 +; CHECK-NEXT: vmov q6[3], q6[1], r3, r4 ; CHECK-NEXT: vand q5, q6, q5 -; CHECK-NEXT: vmov r4, s20 -; CHECK-NEXT: vmov r3, s21 -; CHECK-NEXT: vmov r5, s23 -; CHECK-NEXT: adds.w r6, r12, r4 +; CHECK-NEXT: vmov r3, s20 +; CHECK-NEXT: vmov r2, s21 ; CHECK-NEXT: vmov r4, s22 -; CHECK-NEXT: adc.w r3, r3, lr -; CHECK-NEXT: adds r6, r6, r4 -; CHECK-NEXT: vmov.u8 r4, q1[6] -; CHECK-NEXT: adc.w r12, r3, r5 -; CHECK-NEXT: ubfx r5, r2, #12, #1 -; CHECK-NEXT: ubfx r2, r2, #8, #1 +; CHECK-NEXT: vmov r5, s23 +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adc.w r2, r2, lr +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: vmov.s8 r4, q1[6] +; CHECK-NEXT: adc.w r12, r2, r5 +; CHECK-NEXT: ubfx r5, r6, #12, #1 +; CHECK-NEXT: ubfx r6, r6, #8, #1 ; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.u8 r3, q0[6] -; CHECK-NEXT: vmov q5[2], q5[0], r2, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov q5[3], q5[1], r2, r5 -; CHECK-NEXT: vmov.u8 r2, q1[7] -; CHECK-NEXT: vmov.u8 r5, q0[7] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: smull r2, r5, r5, r2 -; CHECK-NEXT: smull r3, r4, r3, r4 -; CHECK-NEXT: vmov q6[2], q6[0], r3, r2 +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: vmov.s8 r2, q0[6] +; CHECK-NEXT: vmov q5[2], q5[0], r6, r5 +; CHECK-NEXT: smull r2, r4, r2, r4 +; CHECK-NEXT: vmov q5[3], q5[1], r6, r5 +; CHECK-NEXT: vmov.s8 r6, q1[7] +; CHECK-NEXT: vmov.s8 r5, q0[7] +; CHECK-NEXT: smull r6, r5, r5, r6 +; CHECK-NEXT: vmov q6[2], q6[0], r2, r6 ; CHECK-NEXT: vmov q6[3], q6[1], r4, r5 -; CHECK-NEXT: vmov.u8 r4, q1[8] ; CHECK-NEXT: vand q5, q6, q5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov r3, s20 +; CHECK-NEXT: vmov r6, s20 ; CHECK-NEXT: vmov r2, s21 -; CHECK-NEXT: vmov r5, s22 +; CHECK-NEXT: vmov r5, s23 ; CHECK-NEXT: adds r3, r3, r6 -; CHECK-NEXT: vmov r6, s23 -; CHECK-NEXT: adc.w r2, r2, r12 -; CHECK-NEXT: adds.w r12, r3, r5 -; CHECK-NEXT: vmov.u8 r3, q0[8] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: smull r3, r4, r3, r4 -; CHECK-NEXT: adc.w lr, r2, r6 -; CHECK-NEXT: vmov.u8 r2, q4[8] -; CHECK-NEXT: vmov.16 q5[0], r2 -; CHECK-NEXT: vmov.u8 r2, q4[9] -; CHECK-NEXT: vmov.16 q5[1], r2 -; CHECK-NEXT: vmov.u8 r2, q4[10] -; CHECK-NEXT: vmov.16 q5[2], r2 -; CHECK-NEXT: vmov.u8 r2, q4[11] -; CHECK-NEXT: vmov.16 q5[3], r2 -; CHECK-NEXT: vmov.u8 r2, q4[12] -; CHECK-NEXT: vmov.16 q5[4], r2 -; CHECK-NEXT: vmov.u8 r2, q4[13] -; CHECK-NEXT: vmov.16 q5[5], r2 -; CHECK-NEXT: vmov.u8 r2, q4[14] -; CHECK-NEXT: vmov.16 q5[6], r2 -; CHECK-NEXT: vmov.u8 r2, q4[15] -; CHECK-NEXT: vmov.16 q5[7], r2 +; CHECK-NEXT: adc.w r6, r12, r2 +; CHECK-NEXT: vmov r2, s22 +; CHECK-NEXT: adds.w r12, r3, r2 +; CHECK-NEXT: vmov.s8 r2, q1[8] +; CHECK-NEXT: adc.w lr, r6, r5 +; CHECK-NEXT: vmov.u8 r6, q4[8] +; CHECK-NEXT: vmov.16 q5[0], r6 +; CHECK-NEXT: vmov.u8 r6, q4[9] +; CHECK-NEXT: vmov.16 q5[1], r6 +; CHECK-NEXT: vmov.u8 r6, q4[10] +; CHECK-NEXT: vmov.16 q5[2], r6 +; CHECK-NEXT: vmov.u8 r6, q4[11] +; CHECK-NEXT: vmov.16 q5[3], r6 +; CHECK-NEXT: vmov.u8 r6, q4[12] +; CHECK-NEXT: vmov.16 q5[4], r6 +; CHECK-NEXT: vmov.u8 r6, q4[13] +; CHECK-NEXT: vmov.16 q5[5], r6 +; CHECK-NEXT: vmov.u8 r6, q4[14] +; CHECK-NEXT: vmov.16 q5[6], r6 +; CHECK-NEXT: vmov.u8 r6, q4[15] +; CHECK-NEXT: vmov.16 q5[7], r6 +; CHECK-NEXT: vmov.s8 r3, q0[8] ; CHECK-NEXT: vcmp.i16 ne, q5, zr +; CHECK-NEXT: smull r2, r3, r3, r2 ; CHECK-NEXT: vpsel q2, q3, q2 -; CHECK-NEXT: vmov.u16 r2, q2[2] -; CHECK-NEXT: vmov.u16 r6, q2[0] -; CHECK-NEXT: vmov q3[2], q3[0], r6, r2 -; CHECK-NEXT: vmov.u16 r2, q2[3] -; CHECK-NEXT: vmov.u16 r6, q2[1] -; CHECK-NEXT: vmov q3[3], q3[1], r6, r2 -; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r5, r2, #1 -; CHECK-NEXT: ubfx r6, r2, #4, #1 -; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: vmov.u16 r6, q2[2] +; CHECK-NEXT: vmov.u16 r5, q2[0] ; CHECK-NEXT: vmov q3[2], q3[0], r5, r6 +; CHECK-NEXT: vmov.u16 r6, q2[3] +; CHECK-NEXT: vmov.u16 r5, q2[1] ; CHECK-NEXT: vmov q3[3], q3[1], r5, r6 -; CHECK-NEXT: vmov.u8 r6, q1[9] -; CHECK-NEXT: vmov.u8 r5, q0[9] -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: smull r6, r5, r5, r6 -; CHECK-NEXT: vmov q4[2], q4[0], r3, r6 -; CHECK-NEXT: vmov q4[3], q4[1], r4, r5 +; CHECK-NEXT: vcmp.i32 ne, q3, zr +; CHECK-NEXT: vmrs r6, p0 +; CHECK-NEXT: and r4, r6, #1 +; CHECK-NEXT: ubfx r5, r6, #4, #1 +; CHECK-NEXT: rsbs r4, r4, #0 +; CHECK-NEXT: rsbs r5, r5, #0 +; CHECK-NEXT: vmov q3[2], q3[0], r4, r5 +; CHECK-NEXT: vmov q3[3], q3[1], r4, r5 +; CHECK-NEXT: vmov.s8 r5, q1[9] +; CHECK-NEXT: vmov.s8 r4, q0[9] +; CHECK-NEXT: smull r5, r4, r4, r5 +; CHECK-NEXT: vmov q4[2], q4[0], r2, r5 +; CHECK-NEXT: vmov q4[3], q4[1], r3, r4 ; CHECK-NEXT: vand q3, q4, q3 -; CHECK-NEXT: vmov r4, s12 -; CHECK-NEXT: vmov r3, s13 -; CHECK-NEXT: vmov r5, s15 -; CHECK-NEXT: adds.w r6, r12, r4 +; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r2, s13 ; CHECK-NEXT: vmov r4, s14 -; CHECK-NEXT: adc.w r3, r3, lr -; CHECK-NEXT: adds r6, r6, r4 -; CHECK-NEXT: vmov.u8 r4, q1[10] -; CHECK-NEXT: adc.w r12, r3, r5 -; CHECK-NEXT: ubfx r5, r2, #12, #1 -; CHECK-NEXT: ubfx r2, r2, #8, #1 +; CHECK-NEXT: vmov r5, s15 +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adc.w r2, r2, lr +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: vmov.s8 r4, q1[10] +; CHECK-NEXT: adc.w r12, r2, r5 +; CHECK-NEXT: ubfx r5, r6, #12, #1 +; CHECK-NEXT: ubfx r6, r6, #8, #1 ; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.u8 r3, q0[10] -; CHECK-NEXT: vmov q3[2], q3[0], r2, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov q3[3], q3[1], r2, r5 -; CHECK-NEXT: vmov.u8 r2, q1[11] -; CHECK-NEXT: vmov.u8 r5, q0[11] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: smull r2, r5, r5, r2 -; CHECK-NEXT: smull r3, r4, r3, r4 -; CHECK-NEXT: vmov q4[2], q4[0], r3, r2 +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: vmov.s8 r2, q0[10] +; CHECK-NEXT: vmov q3[2], q3[0], r6, r5 +; CHECK-NEXT: smull r2, r4, r2, r4 +; CHECK-NEXT: vmov q3[3], q3[1], r6, r5 +; CHECK-NEXT: vmov.s8 r6, q1[11] +; CHECK-NEXT: vmov.s8 r5, q0[11] +; CHECK-NEXT: smull r6, r5, r5, r6 +; CHECK-NEXT: vmov q4[2], q4[0], r2, r6 ; CHECK-NEXT: vmov q4[3], q4[1], r4, r5 -; CHECK-NEXT: vmov.u8 r4, q1[12] ; CHECK-NEXT: vand q3, q4, q3 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov r3, s12 +; CHECK-NEXT: vmov r6, s12 ; CHECK-NEXT: vmov r2, s13 -; CHECK-NEXT: vmov r5, s14 +; CHECK-NEXT: vmov r5, s15 ; CHECK-NEXT: adds r3, r3, r6 -; CHECK-NEXT: vmov r6, s15 -; CHECK-NEXT: adc.w r2, r2, r12 -; CHECK-NEXT: adds.w r12, r3, r5 -; CHECK-NEXT: vmov.u8 r3, q0[12] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: smull r3, r4, r3, r4 -; CHECK-NEXT: adc.w lr, r2, r6 -; CHECK-NEXT: vmov.u16 r2, q2[6] -; CHECK-NEXT: vmov.u16 r6, q2[4] -; CHECK-NEXT: vmov q3[2], q3[0], r6, r2 -; CHECK-NEXT: vmov.u16 r2, q2[7] -; CHECK-NEXT: vmov.u16 r6, q2[5] -; CHECK-NEXT: vmov q3[3], q3[1], r6, r2 +; CHECK-NEXT: adc.w r6, r12, r2 +; CHECK-NEXT: vmov r2, s14 +; CHECK-NEXT: adds.w r12, r3, r2 +; CHECK-NEXT: vmov.s8 r2, q1[12] +; CHECK-NEXT: adc.w lr, r6, r5 +; CHECK-NEXT: vmov.u16 r6, q2[6] +; CHECK-NEXT: vmov.u16 r5, q2[4] +; CHECK-NEXT: vmov.s8 r3, q0[12] +; CHECK-NEXT: vmov q3[2], q3[0], r5, r6 +; CHECK-NEXT: vmov.u16 r6, q2[7] +; CHECK-NEXT: vmov.u16 r5, q2[5] +; CHECK-NEXT: smull r2, r3, r3, r2 +; CHECK-NEXT: vmov q3[3], q3[1], r5, r6 ; CHECK-NEXT: vcmp.i32 ne, q3, zr -; CHECK-NEXT: vmrs r2, p0 -; CHECK-NEXT: and r5, r2, #1 -; CHECK-NEXT: ubfx r6, r2, #4, #1 +; CHECK-NEXT: vmrs r6, p0 +; CHECK-NEXT: and r4, r6, #1 +; CHECK-NEXT: ubfx r5, r6, #4, #1 +; CHECK-NEXT: rsbs r4, r4, #0 ; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: rsbs r6, r6, #0 -; CHECK-NEXT: vmov q2[2], q2[0], r5, r6 -; CHECK-NEXT: vmov q2[3], q2[1], r5, r6 -; CHECK-NEXT: vmov.u8 r6, q1[13] -; CHECK-NEXT: vmov.u8 r5, q0[13] -; CHECK-NEXT: sxtb r6, r6 -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: smull r6, r5, r5, r6 -; CHECK-NEXT: vmov q3[2], q3[0], r3, r6 -; CHECK-NEXT: vmov q3[3], q3[1], r4, r5 +; CHECK-NEXT: vmov q2[2], q2[0], r4, r5 +; CHECK-NEXT: vmov q2[3], q2[1], r4, r5 +; CHECK-NEXT: vmov.s8 r5, q1[13] +; CHECK-NEXT: vmov.s8 r4, q0[13] +; CHECK-NEXT: smull r5, r4, r4, r5 +; CHECK-NEXT: vmov q3[2], q3[0], r2, r5 +; CHECK-NEXT: vmov q3[3], q3[1], r3, r4 ; CHECK-NEXT: vand q2, q3, q2 -; CHECK-NEXT: vmov r4, s8 -; CHECK-NEXT: vmov r3, s9 -; CHECK-NEXT: vmov r5, s11 -; CHECK-NEXT: adds.w r6, r12, r4 +; CHECK-NEXT: vmov r3, s8 +; CHECK-NEXT: vmov r2, s9 ; CHECK-NEXT: vmov r4, s10 -; CHECK-NEXT: adc.w r3, r3, lr -; CHECK-NEXT: adds r6, r6, r4 -; CHECK-NEXT: vmov.u8 r4, q1[14] -; CHECK-NEXT: adc.w r12, r3, r5 -; CHECK-NEXT: ubfx r5, r2, #12, #1 -; CHECK-NEXT: ubfx r2, r2, #8, #1 +; CHECK-NEXT: vmov r5, s11 +; CHECK-NEXT: adds.w r3, r3, r12 +; CHECK-NEXT: adc.w r2, r2, lr +; CHECK-NEXT: adds r3, r3, r4 +; CHECK-NEXT: vmov.s8 r4, q1[14] +; CHECK-NEXT: adc.w r12, r2, r5 +; CHECK-NEXT: ubfx r5, r6, #12, #1 +; CHECK-NEXT: ubfx r6, r6, #8, #1 ; CHECK-NEXT: rsbs r5, r5, #0 -; CHECK-NEXT: rsbs r2, r2, #0 -; CHECK-NEXT: vmov.u8 r3, q0[14] -; CHECK-NEXT: vmov q2[2], q2[0], r2, r5 -; CHECK-NEXT: sxtb r4, r4 -; CHECK-NEXT: vmov q2[3], q2[1], r2, r5 -; CHECK-NEXT: vmov.u8 r2, q1[15] -; CHECK-NEXT: vmov.u8 r5, q0[15] -; CHECK-NEXT: sxtb r3, r3 -; CHECK-NEXT: sxtb r2, r2 -; CHECK-NEXT: sxtb r5, r5 -; CHECK-NEXT: smull r2, r5, r5, r2 -; CHECK-NEXT: smull r3, r4, r3, r4 -; CHECK-NEXT: vmov q0[2], q0[0], r3, r2 +; CHECK-NEXT: rsbs r6, r6, #0 +; CHECK-NEXT: vmov.s8 r2, q0[14] +; CHECK-NEXT: vmov q2[2], q2[0], r6, r5 +; CHECK-NEXT: smull r2, r4, r2, r4 +; CHECK-NEXT: vmov q2[3], q2[1], r6, r5 +; CHECK-NEXT: vmov.s8 r6, q1[15] +; CHECK-NEXT: vmov.s8 r5, q0[15] +; CHECK-NEXT: smull r6, r5, r5, r6 +; CHECK-NEXT: vmov q0[2], q0[0], r2, r6 ; CHECK-NEXT: vmov q0[3], q0[1], r4, r5 ; CHECK-NEXT: vand q0, q0, q2 -; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r6, s0 ; CHECK-NEXT: vmov r2, s1 ; CHECK-NEXT: vmov r5, s2 ; CHECK-NEXT: adds r3, r3, r6