diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td --- a/llvm/lib/Target/ARM/ARMInstrMVE.td +++ b/llvm/lib/Target/ARM/ARMInstrMVE.td @@ -4737,6 +4737,24 @@ (v16i8 (MVE_VQMOVNu16bh (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), (i32 1))), (v16i8 (MVE_VQMOVNu16th (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm)))>; + + def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))), + (v8i16 (MVE_VQSHRNbhs32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>; + def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))), + (v16i8 (MVE_VQSHRNbhs16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>; + def : Pat<(v8i16 (MVEvqmovns (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshrsImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))), + (v8i16 (MVE_VQSHRNths32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>; + def : Pat<(v16i8 (MVEvqmovns (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshrsImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))), + (v16i8 (MVE_VQSHRNths16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>; + + def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 0))), + (v8i16 (MVE_VQSHRNbhu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>; + def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 0))), + (v16i8 (MVE_VQSHRNbhu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>; + def : Pat<(v8i16 (MVEvqmovnu (v8i16 MQPR:$Qd_src), (v4i32 (ARMvshruImm (v4i32 MQPR:$Qm), imm0_31:$imm)), (i32 1))), + (v8i16 (MVE_VQSHRNthu32 (v8i16 MQPR:$Qd_src), (v4i32 MQPR:$Qm), imm0_31:$imm))>; + def : Pat<(v16i8 (MVEvqmovnu (v16i8 MQPR:$Qd_src), (v8i16 (ARMvshruImm (v8i16 MQPR:$Qm), imm0_15:$imm)), (i32 1))), + (v16i8 (MVE_VQSHRNthu16 (v16i8 MQPR:$Qd_src), (v8i16 MQPR:$Qm), imm0_15:$imm))>; } class MVE_VCVT_ff @vqshrni32_smaxmin(<4 x i32> %so) { ; CHECK-LABEL: vqshrni32_smaxmin: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s32 q0, q0, #3 -; CHECK-NEXT: vqmovnb.s32 q0, q0 +; CHECK-NEXT: vqshrnb.s32 q0, q0, #3 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -20,8 +19,7 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_sminmax(<4 x i32> %so) { ; CHECK-LABEL: vqshrni32_sminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s32 q0, q0, #3 -; CHECK-NEXT: vqmovnb.s32 q0, q0 +; CHECK-NEXT: vqshrnb.s32 q0, q0, #3 ; CHECK-NEXT: vmovlb.s16 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -36,8 +34,7 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_umaxmin(<4 x i32> %so) { ; CHECK-LABEL: vqshrni32_umaxmin: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u32 q0, q0, #3 -; CHECK-NEXT: vqmovnb.u32 q0, q0 +; CHECK-NEXT: vqshrnb.u32 q0, q0, #3 ; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -50,8 +47,7 @@ define arm_aapcs_vfpcc <4 x i32> @vqshrni32_uminmax(<4 x i32> %so) { ; CHECK-LABEL: vqshrni32_uminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u32 q0, q0, #3 -; CHECK-NEXT: vqmovnb.u32 q0, q0 +; CHECK-NEXT: vqshrnb.u32 q0, q0, #3 ; CHECK-NEXT: vmovlb.u16 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -64,8 +60,7 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_smaxmin(<8 x i16> %so) { ; CHECK-LABEL: vqshrni16_smaxmin: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s16 q0, q0, #3 -; CHECK-NEXT: vqmovnb.s16 q0, q0 +; CHECK-NEXT: vqshrnb.s16 q0, q0, #3 ; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -80,8 +75,7 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_sminmax(<8 x i16> %so) { ; CHECK-LABEL: vqshrni16_sminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.s16 q0, q0, #3 -; CHECK-NEXT: vqmovnb.s16 q0, q0 +; CHECK-NEXT: vqshrnb.s16 q0, q0, #3 ; CHECK-NEXT: vmovlb.s8 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -96,8 +90,7 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_umaxmin(<8 x i16> %so) { ; CHECK-LABEL: vqshrni16_umaxmin: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u16 q0, q0, #3 -; CHECK-NEXT: vqmovnb.u16 q0, q0 +; CHECK-NEXT: vqshrnb.u16 q0, q0, #3 ; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: bx lr entry: @@ -110,8 +103,7 @@ define arm_aapcs_vfpcc <8 x i16> @vqshrni16_uminmax(<8 x i16> %so) { ; CHECK-LABEL: vqshrni16_uminmax: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: vshr.u16 q0, q0, #3 -; CHECK-NEXT: vqmovnb.u16 q0, q0 +; CHECK-NEXT: vqshrnb.u16 q0, q0, #3 ; CHECK-NEXT: vmovlb.u8 q0, q0 ; CHECK-NEXT: bx lr entry: