Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -284,6 +284,8 @@ setOperationAction(ISD::HADDU, VT, Legal); setOperationAction(ISD::RHADDS, VT, Legal); setOperationAction(ISD::RHADDU, VT, Legal); + setOperationAction(ISD::ABDS, VT, Legal); + setOperationAction(ISD::ABDU, VT, Legal); // No native support for these. setOperationAction(ISD::UDIV, VT, Expand); Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2099,36 +2099,31 @@ let validForTailPredication = 1; } -multiclass MVE_VABD_m { +multiclass MVE_VABD_m { def "" : MVE_VABD_int; defvar Inst = !cast(NAME); let Predicates = [HasMVEInt] in { + defm : MVE_TwoOpPattern(NAME)>; + // Unpredicated absolute difference def : Pat<(VTI.Vec (unpred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), (i32 VTI.Unsigned))), (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn)))>; - - // Predicated absolute difference - def : Pat<(VTI.Vec (pred_int (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - (i32 VTI.Unsigned), (VTI.Pred VCCR:$mask), - (VTI.Vec MQPR:$inactive))), - (VTI.Vec (Inst (VTI.Vec MQPR:$Qm), (VTI.Vec MQPR:$Qn), - ARMVCCThen, (VTI.Pred VCCR:$mask), - (VTI.Vec MQPR:$inactive)))>; } } -multiclass MVE_VABD - : MVE_VABD_m; +multiclass MVE_VABD + : MVE_VABD_m; -defm MVE_VABDs8 : MVE_VABD; -defm MVE_VABDs16 : MVE_VABD; -defm MVE_VABDs32 : MVE_VABD; -defm MVE_VABDu8 : MVE_VABD; -defm MVE_VABDu16 : MVE_VABD; -defm MVE_VABDu32 : MVE_VABD; +defm MVE_VABDs8 : MVE_VABD; +defm MVE_VABDs16 : MVE_VABD; +defm MVE_VABDs32 : MVE_VABD; +defm MVE_VABDu8 : MVE_VABD; +defm MVE_VABDu16 : MVE_VABD; +defm MVE_VABDu32 : MVE_VABD; class MVE_VRHADD_Base size, list pattern=[]> : MVE_int<"vrhadd", suffix, size, pattern> { Index: llvm/test/CodeGen/Thumb2/mve-vabdus.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vabdus.ll +++ llvm/test/CodeGen/Thumb2/mve-vabdus.ll @@ -4,111 +4,7 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_s8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vabd_s8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[6] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmovlb.s8 q2, q2 -; CHECK-NEXT: vmovlb.s8 q3, q3 -; CHECK-NEXT: vsub.i16 q2, q3, q2 -; CHECK-NEXT: vabs.s16 q3, q2 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.8 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.8 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.8 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.8 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vmov.8 q2[4], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.8 q2[5], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] -; CHECK-NEXT: vmov.8 q2[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.8 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[8] -; CHECK-NEXT: vmovlb.s8 q1, q3 -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[9] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[10] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[11] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[12] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[13] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[14] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmovlb.s8 q0, q3 -; CHECK-NEXT: vsub.i16 q0, q0, q1 -; CHECK-NEXT: vabs.s16 q0, q0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.8 q2[8], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.8 q2[9], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.8 q2[10], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.8 q2[11], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.8 q2[12], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.8 q2[13], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.8 q2[14], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.8 q2[15], r0 -; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vabd.s8 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <16 x i8> %src1 to <16 x i16> %sextsrc2 = sext <16 x i8> %src2 to <16 x i16> @@ -123,63 +19,7 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_s16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vabd_s16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.s16 q2, q2 -; CHECK-NEXT: vmovlb.s16 q3, q3 -; CHECK-NEXT: vsub.i32 q2, q3, q2 -; CHECK-NEXT: vabs.s32 q3, q2 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov r0, s13 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s15 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmovlb.s16 q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.s16 q0, q3 -; CHECK-NEXT: vsub.i32 q0, q0, q1 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vabd.s16 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <8 x i16> %src1 to <8 x i32> %sextsrc2 = sext <8 x i16> %src2 to <8 x i32> @@ -194,47 +34,7 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_s32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vabd_s32: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.f32 s8, s0 -; CHECK-NEXT: vmov.f32 s12, s4 -; CHECK-NEXT: vmov.f32 s10, s1 -; CHECK-NEXT: vmov.f32 s14, s5 -; CHECK-NEXT: vmov r3, s12 -; CHECK-NEXT: vmov r0, s10 -; CHECK-NEXT: vmov r2, s14 -; CHECK-NEXT: vmov.f32 s12, s2 -; CHECK-NEXT: vmov.f32 s14, s3 -; CHECK-NEXT: vmov.f32 s0, s6 -; CHECK-NEXT: vmov.f32 s2, s7 -; CHECK-NEXT: asrs r1, r0, #31 -; CHECK-NEXT: subs r0, r0, r2 -; CHECK-NEXT: sbc.w r1, r1, r2, asr #31 -; CHECK-NEXT: add.w r0, r0, r1, asr #31 -; CHECK-NEXT: eor.w r0, r0, r1, asr #31 -; CHECK-NEXT: vmov r1, s8 -; CHECK-NEXT: asrs r2, r1, #31 -; CHECK-NEXT: subs r1, r1, r3 -; CHECK-NEXT: sbc.w r2, r2, r3, asr #31 -; CHECK-NEXT: add.w r1, r1, r2, asr #31 -; CHECK-NEXT: eor.w r1, r1, r2, asr #31 -; CHECK-NEXT: vmov r2, s0 -; CHECK-NEXT: vmov.32 q2[0], r1 -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: asrs r1, r0, #31 -; CHECK-NEXT: subs r0, r0, r2 -; CHECK-NEXT: sbc.w r1, r1, r2, asr #31 -; CHECK-NEXT: vmov r2, s2 -; CHECK-NEXT: add.w r0, r0, r1, asr #31 -; CHECK-NEXT: eor.w r0, r0, r1, asr #31 -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: asrs r1, r0, #31 -; CHECK-NEXT: subs r0, r0, r2 -; CHECK-NEXT: sbc.w r1, r1, r2, asr #31 -; CHECK-NEXT: add.w r0, r0, r1, asr #31 -; CHECK-NEXT: eor.w r0, r0, r1, asr #31 -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vabd.s32 q0, q0, q1 ; CHECK-NEXT: bx lr %sextsrc1 = sext <4 x i32> %src1 to <4 x i64> %sextsrc2 = sext <4 x i32> %src2 to <4 x i64> @@ -249,111 +49,7 @@ define arm_aapcs_vfpcc <16 x i8> @vabd_u8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: vabd_u8: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.u8 r0, q1[0] -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[1] -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[2] -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[3] -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[4] -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[5] -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[6] -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[7] -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[0] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[1] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[2] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[3] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[4] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[5] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[6] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[7] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmovlb.u8 q2, q2 -; CHECK-NEXT: vmovlb.u8 q3, q3 -; CHECK-NEXT: vsub.i16 q2, q3, q2 -; CHECK-NEXT: vabs.s16 q3, q2 -; CHECK-NEXT: vmov.u16 r0, q3[0] -; CHECK-NEXT: vmov.8 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q3[1] -; CHECK-NEXT: vmov.8 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q3[2] -; CHECK-NEXT: vmov.8 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q3[3] -; CHECK-NEXT: vmov.8 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q3[4] -; CHECK-NEXT: vmov.8 q2[4], r0 -; CHECK-NEXT: vmov.u16 r0, q3[5] -; CHECK-NEXT: vmov.8 q2[5], r0 -; CHECK-NEXT: vmov.u16 r0, q3[6] -; CHECK-NEXT: vmov.8 q2[6], r0 -; CHECK-NEXT: vmov.u16 r0, q3[7] -; CHECK-NEXT: vmov.8 q2[7], r0 -; CHECK-NEXT: vmov.u8 r0, q1[8] -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q1[9] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q1[10] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q1[11] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q1[12] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q1[13] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q1[14] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q1[15] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmov.u8 r0, q0[8] -; CHECK-NEXT: vmovlb.u8 q1, q3 -; CHECK-NEXT: vmov.16 q3[0], r0 -; CHECK-NEXT: vmov.u8 r0, q0[9] -; CHECK-NEXT: vmov.16 q3[1], r0 -; CHECK-NEXT: vmov.u8 r0, q0[10] -; CHECK-NEXT: vmov.16 q3[2], r0 -; CHECK-NEXT: vmov.u8 r0, q0[11] -; CHECK-NEXT: vmov.16 q3[3], r0 -; CHECK-NEXT: vmov.u8 r0, q0[12] -; CHECK-NEXT: vmov.16 q3[4], r0 -; CHECK-NEXT: vmov.u8 r0, q0[13] -; CHECK-NEXT: vmov.16 q3[5], r0 -; CHECK-NEXT: vmov.u8 r0, q0[14] -; CHECK-NEXT: vmov.16 q3[6], r0 -; CHECK-NEXT: vmov.u8 r0, q0[15] -; CHECK-NEXT: vmov.16 q3[7], r0 -; CHECK-NEXT: vmovlb.u8 q0, q3 -; CHECK-NEXT: vsub.i16 q0, q0, q1 -; CHECK-NEXT: vabs.s16 q0, q0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.8 q2[8], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.8 q2[9], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.8 q2[10], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.8 q2[11], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmov.8 q2[12], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.8 q2[13], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.8 q2[14], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.8 q2[15], r0 -; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vabd.u8 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <16 x i8> %src1 to <16 x i16> %zextsrc2 = zext <16 x i8> %src2 to <16 x i16> @@ -368,63 +64,7 @@ define arm_aapcs_vfpcc <8 x i16> @vabd_u16(<8 x i16> %src1, <8 x i16> %src2) { ; CHECK-LABEL: vabd_u16: ; CHECK: @ %bb.0: -; CHECK-NEXT: vmov.u16 r0, q1[0] -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[1] -; CHECK-NEXT: vmov.32 q2[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[2] -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[3] -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[0] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[1] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[2] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[3] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.u16 q2, q2 -; CHECK-NEXT: vmovlb.u16 q3, q3 -; CHECK-NEXT: vsub.i32 q2, q3, q2 -; CHECK-NEXT: vabs.s32 q3, q2 -; CHECK-NEXT: vmov r0, s12 -; CHECK-NEXT: vmov.16 q2[0], r0 -; CHECK-NEXT: vmov r0, s13 -; CHECK-NEXT: vmov.16 q2[1], r0 -; CHECK-NEXT: vmov r0, s14 -; CHECK-NEXT: vmov.16 q2[2], r0 -; CHECK-NEXT: vmov r0, s15 -; CHECK-NEXT: vmov.16 q2[3], r0 -; CHECK-NEXT: vmov.u16 r0, q1[4] -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q1[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q1[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q1[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmov.u16 r0, q0[4] -; CHECK-NEXT: vmovlb.u16 q1, q3 -; CHECK-NEXT: vmov.32 q3[0], r0 -; CHECK-NEXT: vmov.u16 r0, q0[5] -; CHECK-NEXT: vmov.32 q3[1], r0 -; CHECK-NEXT: vmov.u16 r0, q0[6] -; CHECK-NEXT: vmov.32 q3[2], r0 -; CHECK-NEXT: vmov.u16 r0, q0[7] -; CHECK-NEXT: vmov.32 q3[3], r0 -; CHECK-NEXT: vmovlb.u16 q0, q3 -; CHECK-NEXT: vsub.i32 q0, q0, q1 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vmov r0, s0 -; CHECK-NEXT: vmov.16 q2[4], r0 -; CHECK-NEXT: vmov r0, s1 -; CHECK-NEXT: vmov.16 q2[5], r0 -; CHECK-NEXT: vmov r0, s2 -; CHECK-NEXT: vmov.16 q2[6], r0 -; CHECK-NEXT: vmov r0, s3 -; CHECK-NEXT: vmov.16 q2[7], r0 -; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: vabd.u16 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <8 x i16> %src1 to <8 x i32> %zextsrc2 = zext <8 x i16> %src2 to <8 x i32> @@ -439,59 +79,7 @@ define arm_aapcs_vfpcc <4 x i32> @vabd_u32(<4 x i32> %src1, <4 x i32> %src2) { ; CHECK-LABEL: vabd_u32: ; CHECK: @ %bb.0: -; CHECK-NEXT: .vsave {d8, d9} -; CHECK-NEXT: vpush {d8, d9} -; CHECK-NEXT: vmov.f32 s8, s4 -; CHECK-NEXT: vmov.i64 q3, #0xffffffff -; CHECK-NEXT: vmov.f32 s16, s0 -; CHECK-NEXT: vmov.f32 s10, s5 -; CHECK-NEXT: vmov.f32 s18, s1 -; CHECK-NEXT: vand q2, q2, q3 -; CHECK-NEXT: vand q4, q4, q3 -; CHECK-NEXT: vmov r2, s10 -; CHECK-NEXT: vmov r3, s18 -; CHECK-NEXT: vmov r0, s11 -; CHECK-NEXT: vmov r1, s19 -; CHECK-NEXT: subs r2, r3, r2 -; CHECK-NEXT: vmov r3, s8 -; CHECK-NEXT: sbc.w r0, r1, r0 -; CHECK-NEXT: add.w r1, r2, r0, asr #31 -; CHECK-NEXT: vmov r2, s17 -; CHECK-NEXT: eor.w r12, r1, r0, asr #31 -; CHECK-NEXT: vmov r0, s16 -; CHECK-NEXT: vmov r1, s9 -; CHECK-NEXT: vmov.f32 s16, s6 -; CHECK-NEXT: vmov.f32 s18, s7 -; CHECK-NEXT: vand q1, q4, q3 -; CHECK-NEXT: vmov.f32 s16, s2 -; CHECK-NEXT: vmov.f32 s18, s3 -; CHECK-NEXT: vand q0, q4, q3 -; CHECK-NEXT: subs r0, r0, r3 -; CHECK-NEXT: vmov r3, s0 -; CHECK-NEXT: sbc.w r1, r2, r1 -; CHECK-NEXT: vmov r2, s4 -; CHECK-NEXT: add.w r0, r0, r1, asr #31 -; CHECK-NEXT: eor.w r0, r0, r1, asr #31 -; CHECK-NEXT: vmov r1, s1 -; CHECK-NEXT: vmov.32 q2[0], r0 -; CHECK-NEXT: vmov r0, s5 -; CHECK-NEXT: vmov.32 q2[1], r12 -; CHECK-NEXT: subs r2, r3, r2 -; CHECK-NEXT: vmov r3, s2 -; CHECK-NEXT: sbc.w r0, r1, r0 -; CHECK-NEXT: add.w r1, r2, r0, asr #31 -; CHECK-NEXT: vmov r2, s6 -; CHECK-NEXT: eor.w r0, r1, r0, asr #31 -; CHECK-NEXT: vmov r1, s3 -; CHECK-NEXT: vmov.32 q2[2], r0 -; CHECK-NEXT: vmov r0, s7 -; CHECK-NEXT: subs r2, r3, r2 -; CHECK-NEXT: sbc.w r0, r1, r0 -; CHECK-NEXT: add.w r1, r2, r0, asr #31 -; CHECK-NEXT: eor.w r0, r1, r0, asr #31 -; CHECK-NEXT: vmov.32 q2[3], r0 -; CHECK-NEXT: vmov q0, q2 -; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: vabd.u32 q0, q0, q1 ; CHECK-NEXT: bx lr %zextsrc1 = zext <4 x i32> %src1 to <4 x i64> %zextsrc2 = zext <4 x i32> %src2 to <4 x i64> @@ -512,26 +100,10 @@ ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB6_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrb.s32 q0, [r1, #12] -; CHECK-NEXT: vldrb.s32 q1, [r0, #12] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrb.s32 q1, [r0, #8] -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2, #12] -; CHECK-NEXT: vldrb.s32 q0, [r1, #8] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrb.s32 q1, [r0, #4] -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2, #8] -; CHECK-NEXT: vldrb.s32 q0, [r1, #4] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrb.s32 q1, [r0], #16 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2, #4] -; CHECK-NEXT: vldrb.s32 q0, [r1], #16 -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2], #16 +; CHECK-NEXT: vldrb.u8 q0, [r1], #16 +; CHECK-NEXT: vldrb.u8 q1, [r0], #16 +; CHECK-NEXT: vabd.s8 q0, q1, q0 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB6_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -573,16 +145,10 @@ ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB7_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.s32 q0, [r1, #8] -; CHECK-NEXT: vldrh.s32 q1, [r0, #8] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrh.s32 q1, [r0], #16 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrh.32 q0, [r2, #8] -; CHECK-NEXT: vldrh.s32 q0, [r1], #16 -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrh.32 q0, [r2], #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q1, [r0], #16 +; CHECK-NEXT: vabd.s16 q0, q1, q0 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB7_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -728,26 +294,10 @@ ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB9_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrb.u32 q0, [r1, #12] -; CHECK-NEXT: vldrb.u32 q1, [r0, #12] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrb.u32 q1, [r0, #8] -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2, #12] -; CHECK-NEXT: vldrb.u32 q0, [r1, #8] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrb.u32 q1, [r0, #4] -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2, #8] -; CHECK-NEXT: vldrb.u32 q0, [r1, #4] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrb.u32 q1, [r0], #16 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2, #4] -; CHECK-NEXT: vldrb.u32 q0, [r1], #16 -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrb.32 q0, [r2], #16 +; CHECK-NEXT: vldrb.u8 q0, [r1], #16 +; CHECK-NEXT: vldrb.u8 q1, [r0], #16 +; CHECK-NEXT: vabd.u8 q0, q1, q0 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB9_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc} @@ -789,16 +339,10 @@ ; CHECK-NEXT: dls lr, lr ; CHECK-NEXT: .LBB10_1: @ %vector.body ; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: vldrh.u32 q0, [r1, #8] -; CHECK-NEXT: vldrh.u32 q1, [r0, #8] -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vldrh.u32 q1, [r0], #16 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrh.32 q0, [r2, #8] -; CHECK-NEXT: vldrh.u32 q0, [r1], #16 -; CHECK-NEXT: vsub.i32 q0, q1, q0 -; CHECK-NEXT: vabs.s32 q0, q0 -; CHECK-NEXT: vstrh.32 q0, [r2], #16 +; CHECK-NEXT: vldrh.u16 q0, [r1], #16 +; CHECK-NEXT: vldrh.u16 q1, [r0], #16 +; CHECK-NEXT: vabd.u16 q0, q1, q0 +; CHECK-NEXT: vstrb.8 q0, [r2], #16 ; CHECK-NEXT: le lr, .LBB10_1 ; CHECK-NEXT: @ %bb.2: @ %for.cond.cleanup ; CHECK-NEXT: pop {r7, pc}