Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2029,6 +2029,31 @@ defm MVE_VRHADDu16 : MVE_VRHADD; defm MVE_VRHADDu32 : MVE_VRHADD; +def : Pat<(v16i8 (ARMvshrsImm (add (add (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)), + (v16i8 (ARMvmovImm (i32 3585)))), + (i32 1))), + (MVE_VRHADDs8 MQPR:$Qm, MQPR:$Qn)>; +def : Pat<(v8i16 (ARMvshrsImm (add (add (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)), + (v8i16 (ARMvmovImm (i32 2049)))), + (i32 1))), + (MVE_VRHADDs16 MQPR:$Qm, MQPR:$Qn)>; +def : Pat<(v4i32 (ARMvshrsImm (add (add (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)), + (v4i32 (ARMvmovImm (i32 1)))), + (i32 1))), + (MVE_VRHADDs32 MQPR:$Qm, MQPR:$Qn)>; +def : Pat<(v16i8 (ARMvshruImm (add (add (v16i8 MQPR:$Qm), (v16i8 MQPR:$Qn)), + (v16i8 (ARMvmovImm (i32 3585)))), + (i32 1))), + (MVE_VRHADDu8 MQPR:$Qm, MQPR:$Qn)>; +def : Pat<(v8i16 (ARMvshruImm (add (add (v8i16 MQPR:$Qm), (v8i16 MQPR:$Qn)), + (v8i16 (ARMvmovImm (i32 2049)))), + (i32 1))), + (MVE_VRHADDu16 MQPR:$Qm, MQPR:$Qn)>; +def : Pat<(v4i32 (ARMvshruImm (add (add (v4i32 MQPR:$Qm), (v4i32 MQPR:$Qn)), + (v4i32 (ARMvmovImm (i32 1)))), + (i32 1))), + (MVE_VRHADDu32 MQPR:$Qm, MQPR:$Qn)>; + class MVE_VHADDSUB size, list pattern=[]> : MVE_int { Index: llvm/test/CodeGen/Thumb2/mve-halving.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-halving.ll +++ llvm/test/CodeGen/Thumb2/mve-halving.ll @@ -109,3 +109,63 @@ %half = lshr <4 x i32> %sub, ret <4 x i32> %half } +define arm_aapcs_vfpcc <16 x i8> @vrhadds_v16i8(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: vrhadds_v16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vrhadd.s8 q0, q0, q1 +; CHECK-NEXT: bx lr + %add = add <16 x i8> %x, %y + %round = add <16 x i8> %add, + %half = ashr <16 x i8> %round, + ret <16 x i8> %half +} +define arm_aapcs_vfpcc <16 x i8> @vrhaddu_v16i8(<16 x i8> %x, <16 x i8> %y) { +; CHECK-LABEL: vrhaddu_v16i8: +; CHECK: @ %bb.0: +; CHECK-NEXT: vrhadd.u8 q0, q0, q1 +; CHECK-NEXT: bx lr + %add = add <16 x i8> %x, %y + %round = add <16 x i8> %add, + %half = lshr <16 x i8> %round, + ret <16 x i8> %half +} +define arm_aapcs_vfpcc <8 x i16> @vrhadds_v8i16(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: vrhadds_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vrhadd.s16 q0, q0, q1 +; CHECK-NEXT: bx lr + %add = add <8 x i16> %x, %y + %round = add <8 x i16> %add, + %half = ashr <8 x i16> %round, + ret <8 x i16> %half +} +define arm_aapcs_vfpcc <8 x i16> @vrhaddu_v8i16(<8 x i16> %x, <8 x i16> %y) { +; CHECK-LABEL: vrhaddu_v8i16: +; CHECK: @ %bb.0: +; CHECK-NEXT: vrhadd.u16 q0, q0, q1 +; CHECK-NEXT: bx lr + %add = add <8 x i16> %x, %y + %round = add <8 x i16> %add, + %half = lshr <8 x i16> %round, + ret <8 x i16> %half +} +define arm_aapcs_vfpcc <4 x i32> @vrhadds_v4i32(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: vrhadds_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vrhadd.s32 q0, q0, q1 +; CHECK-NEXT: bx lr + %add = add <4 x i32> %x, %y + %round = add <4 x i32> %add, + %half = ashr <4 x i32> %round, + ret <4 x i32> %half +} +define arm_aapcs_vfpcc <4 x i32> @vrhaddu_v4i32(<4 x i32> %x, <4 x i32> %y) { +; CHECK-LABEL: vrhaddu_v4i32: +; CHECK: @ %bb.0: +; CHECK-NEXT: vrhadd.u32 q0, q0, q1 +; CHECK-NEXT: bx lr + %add = add <4 x i32> %x, %y + %round = add <4 x i32> %add, + %half = lshr <4 x i32> %round, + ret <4 x i32> %half +}