Index: llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/trunk/lib/Target/ARM/ARMISelLowering.cpp @@ -256,6 +256,13 @@ setOperationAction(ISD::SDIV, VT, Expand); setOperationAction(ISD::UREM, VT, Expand); setOperationAction(ISD::SREM, VT, Expand); + + if (!HasMVEFP) { + setOperationAction(ISD::SINT_TO_FP, VT, Expand); + setOperationAction(ISD::UINT_TO_FP, VT, Expand); + setOperationAction(ISD::FP_TO_SINT, VT, Expand); + setOperationAction(ISD::FP_TO_UINT, VT, Expand); + } } const MVT FloatTypes[] = { MVT::v8f16, MVT::v4f32 }; Index: llvm/trunk/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrMVE.td +++ llvm/trunk/lib/Target/ARM/ARMInstrMVE.td @@ -2546,6 +2546,25 @@ def MVE_VCVTf32s32n : MVE_VCVT_fp_int<"f32.s32", 0b10, 0b00>; def MVE_VCVTf32u32n : MVE_VCVT_fp_int<"f32.u32", 0b10, 0b01>; +let Predicates = [HasMVEFloat] in { +def : Pat<(v4i32 (fp_to_sint (v4f32 MQPR:$src))), + (v4i32 (MVE_VCVTs32f32z (v4f32 MQPR:$src)))>; +def : Pat<(v4i32 (fp_to_uint (v4f32 MQPR:$src))), + (v4i32 (MVE_VCVTu32f32z (v4f32 MQPR:$src)))>; +def : Pat<(v8i16 (fp_to_sint (v8f16 MQPR:$src))), + (v8i16 (MVE_VCVTs16f16z (v8f16 MQPR:$src)))>; +def : Pat<(v8i16 (fp_to_uint (v8f16 MQPR:$src))), + (v8i16 (MVE_VCVTu16f16z (v8f16 MQPR:$src)))>; +def : Pat<(v4f32 (sint_to_fp (v4i32 MQPR:$src))), + (v4f32 (MVE_VCVTf32s32n (v4i32 MQPR:$src)))>; +def : Pat<(v4f32 (uint_to_fp (v4i32 MQPR:$src))), + (v4f32 (MVE_VCVTf32u32n (v4i32 MQPR:$src)))>; +def : Pat<(v8f16 (sint_to_fp (v8i16 MQPR:$src))), + (v8f16 (MVE_VCVTf16s16n (v8i16 MQPR:$src)))>; +def : Pat<(v8f16 (uint_to_fp (v8i16 MQPR:$src))), + (v8f16 (MVE_VCVTf16u16n (v8i16 MQPR:$src)))>; +} + class MVE_VABSNEG_fp size, bit negate, list pattern=[]> : MVE_float @foo_float_int32(<4 x i32> %src) { +; CHECK-MVE-LABEL: foo_float_int32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcvt.f32.s32 s7, s3 +; CHECK-MVE-NEXT: vcvt.f32.s32 s6, s2 +; CHECK-MVE-NEXT: vcvt.f32.s32 s5, s1 +; CHECK-MVE-NEXT: vcvt.f32.s32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_float_int32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.f32.s32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = sitofp <4 x i32> %src to <4 x float> + ret <4 x float> %out +} + +define arm_aapcs_vfpcc <4 x float> @foo_float_uint32(<4 x i32> %src) { +; CHECK-MVE-LABEL: foo_float_uint32: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcvt.f32.u32 s7, s3 +; CHECK-MVE-NEXT: vcvt.f32.u32 s6, s2 +; CHECK-MVE-NEXT: vcvt.f32.u32 s5, s1 +; CHECK-MVE-NEXT: vcvt.f32.u32 s4, s0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_float_uint32: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.f32.u32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = uitofp <4 x i32> %src to <4 x float> + ret <4 x float> %out +} + +define arm_aapcs_vfpcc <4 x i32> @foo_int32_float(<4 x float> %src) { +; CHECK-MVE-LABEL: foo_int32_float: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcvt.s32.f32 s4, s0 +; CHECK-MVE-NEXT: vcvt.s32.f32 s6, s1 +; CHECK-MVE-NEXT: vcvt.s32.f32 s8, s3 +; CHECK-MVE-NEXT: vcvt.s32.f32 s10, s2 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov.32 q0[0], r0 +; CHECK-MVE-NEXT: vmov r0, s6 +; CHECK-MVE-NEXT: vmov.32 q0[1], r0 +; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: vmov.32 q0[2], r0 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.32 q0[3], r0 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_int32_float: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.s32.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = fptosi <4 x float> %src to <4 x i32> + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <4 x i32> @foo_uint32_float(<4 x float> %src) { +; CHECK-MVE-LABEL: foo_uint32_float: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vcvt.u32.f32 s4, s0 +; CHECK-MVE-NEXT: vcvt.u32.f32 s6, s1 +; CHECK-MVE-NEXT: vcvt.u32.f32 s8, s3 +; CHECK-MVE-NEXT: vcvt.u32.f32 s10, s2 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov.32 q0[0], r0 +; CHECK-MVE-NEXT: vmov r0, s6 +; CHECK-MVE-NEXT: vmov.32 q0[1], r0 +; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: vmov.32 q0[2], r0 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.32 q0[3], r0 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_uint32_float: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.u32.f32 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = fptoui <4 x float> %src to <4 x i32> + ret <4 x i32> %out +} + +define arm_aapcs_vfpcc <8 x half> @foo_half_int16(<8 x i16> %src) { +; CHECK-MVE-LABEL: foo_half_int16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: sxth r1, r1 +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vcvt.f16.s32 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: sxth r0, r0 +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vcvt.f16.s32 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_half_int16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.f16.s16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = sitofp <8 x i16> %src to <8 x half> + ret <8 x half> %out +} + +define arm_aapcs_vfpcc <8 x half> @foo_half_uint16(<8 x i16> %src) { +; CHECK-MVE-LABEL: foo_half_uint16: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov.u16 r1, q0[1] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov s4, r1 +; CHECK-MVE-NEXT: vcvt.f16.u32 s4, s4 +; CHECK-MVE-NEXT: vmov r1, s4 +; CHECK-MVE-NEXT: vmov.16 q1[0], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov.16 q1[1], r1 +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[2], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[3], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[4], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[5], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s8, s8 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q1[6], r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vcvt.f16.u32 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q1[7], r0 +; CHECK-MVE-NEXT: vmov q0, q1 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_half_uint16: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.f16.u16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = uitofp <8 x i16> %src to <8 x half> + ret <8 x half> %out +} + +define arm_aapcs_vfpcc <8 x i16> @foo_int16_half(<8 x half> %src) { +; CHECK-MVE-LABEL: foo_int16_half: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s6, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[1] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov s5, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 +; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s8 +; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 +; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s12 +; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s5 +; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q0[0], r0 +; CHECK-MVE-NEXT: vmov r0, s14 +; CHECK-MVE-NEXT: vmov.16 q0[1], r0 +; CHECK-MVE-NEXT: vmov r0, s5 +; CHECK-MVE-NEXT: vmov.16 q0[2], r0 +; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: vmov.16 q0[3], r0 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q0[4], r0 +; CHECK-MVE-NEXT: vmov r0, s6 +; CHECK-MVE-NEXT: vmov.16 q0[5], r0 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q0[6], r0 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov.16 q0[7], r0 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_int16_half: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.s16.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = fptosi <8 x half> %src to <8 x i16> + ret <8 x i16> %out +} + +define arm_aapcs_vfpcc <8 x i16> @foo_uint16_half(<8 x half> %src) { +; CHECK-MVE-LABEL: foo_uint16_half: +; CHECK-MVE: @ %bb.0: @ %entry +; CHECK-MVE-NEXT: vmov.u16 r0, q0[7] +; CHECK-MVE-NEXT: vmov s4, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[5] +; CHECK-MVE-NEXT: vmov s6, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[6] +; CHECK-MVE-NEXT: vmov s8, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[3] +; CHECK-MVE-NEXT: vmov s10, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[4] +; CHECK-MVE-NEXT: vmov s12, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[1] +; CHECK-MVE-NEXT: vmov s14, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[2] +; CHECK-MVE-NEXT: vmov s5, r0 +; CHECK-MVE-NEXT: vmov.u16 r0, q0[0] +; CHECK-MVE-NEXT: vmov s0, r0 +; CHECK-MVE-NEXT: vcvt.s32.f16 s4, s4 +; CHECK-MVE-NEXT: vcvt.s32.f16 s6, s6 +; CHECK-MVE-NEXT: vcvt.s32.f16 s8, s8 +; CHECK-MVE-NEXT: vcvt.s32.f16 s10, s10 +; CHECK-MVE-NEXT: vcvt.s32.f16 s12, s12 +; CHECK-MVE-NEXT: vcvt.s32.f16 s14, s14 +; CHECK-MVE-NEXT: vcvt.s32.f16 s5, s5 +; CHECK-MVE-NEXT: vcvt.s32.f16 s0, s0 +; CHECK-MVE-NEXT: vmov r0, s0 +; CHECK-MVE-NEXT: vmov.16 q0[0], r0 +; CHECK-MVE-NEXT: vmov r0, s14 +; CHECK-MVE-NEXT: vmov.16 q0[1], r0 +; CHECK-MVE-NEXT: vmov r0, s5 +; CHECK-MVE-NEXT: vmov.16 q0[2], r0 +; CHECK-MVE-NEXT: vmov r0, s10 +; CHECK-MVE-NEXT: vmov.16 q0[3], r0 +; CHECK-MVE-NEXT: vmov r0, s12 +; CHECK-MVE-NEXT: vmov.16 q0[4], r0 +; CHECK-MVE-NEXT: vmov r0, s6 +; CHECK-MVE-NEXT: vmov.16 q0[5], r0 +; CHECK-MVE-NEXT: vmov r0, s8 +; CHECK-MVE-NEXT: vmov.16 q0[6], r0 +; CHECK-MVE-NEXT: vmov r0, s4 +; CHECK-MVE-NEXT: vmov.16 q0[7], r0 +; CHECK-MVE-NEXT: bx lr +; +; CHECK-MVEFP-LABEL: foo_uint16_half: +; CHECK-MVEFP: @ %bb.0: @ %entry +; CHECK-MVEFP-NEXT: vcvt.u16.f16 q0, q0 +; CHECK-MVEFP-NEXT: bx lr +entry: + %out = fptoui <8 x half> %src to <8 x i16> + ret <8 x i16> %out +}