Index: llvm/lib/Target/ARM/ARMISelLowering.cpp =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.cpp +++ llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -346,6 +346,10 @@ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VT, Custom); setOperationAction(ISD::BUILD_VECTOR, VT, Custom); } + // We can do bitwise operations on v2i64 vectors + setOperationAction(ISD::AND, MVT::v2i64, Legal); + setOperationAction(ISD::OR, MVT::v2i64, Legal); + setOperationAction(ISD::XOR, MVT::v2i64, Legal); // It is legal to extload from v4i8 to v4i16 or v4i32. addAllExtLoads(MVT::v8i16, MVT::v8i8, Legal); @@ -12890,6 +12894,8 @@ const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (!VT.isVector() || !TLI.isTypeLegal(VT)) return SDValue(); + if (ST->hasMVEIntegerOps() && VT == MVT::v2i64) + return SDValue(); int64_t Cnt; Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -1038,6 +1038,8 @@ (v8i16 (MVE_VMVN (v8i16 MQPR:$val1)))>; def : Pat<(v4i32 (vnotq (v4i32 MQPR:$val1))), (v4i32 (MVE_VMVN (v4i32 MQPR:$val1)))>; + def : Pat<(v2i64 (vnotq (v2i64 MQPR:$val1))), + (v2i64 (MVE_VMVN (v2i64 MQPR:$val1)))>; } class MVE_bit_ops bit_21_20, bit bit_28> @@ -1084,6 +1086,8 @@ (v8i16 (MVE_VAND (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), (v4i32 (MVE_VAND (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), + (v2i64 (MVE_VAND (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), (v16i8 (MVE_VORR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; @@ -1091,6 +1095,8 @@ (v8i16 (MVE_VORR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), (v4i32 (MVE_VORR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), + (v2i64 (MVE_VORR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; def : Pat<(v16i8 (xor (v16i8 MQPR:$val1), (v16i8 MQPR:$val2))), (v16i8 (MVE_VEOR (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; @@ -1098,6 +1104,8 @@ (v8i16 (MVE_VEOR (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; def : Pat<(v4i32 (xor (v4i32 MQPR:$val1), (v4i32 MQPR:$val2))), (v4i32 (MVE_VEOR (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + def : Pat<(v2i64 (xor (v2i64 MQPR:$val1), (v2i64 MQPR:$val2))), + (v2i64 (MVE_VEOR (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; def : Pat<(v16i8 (and (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), (v16i8 (MVE_VBIC (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; @@ -1105,13 +1113,17 @@ (v8i16 (MVE_VBIC (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; def : Pat<(v4i32 (and (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), (v4i32 (MVE_VBIC (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + def : Pat<(v2i64 (and (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), + (v2i64 (MVE_VBIC (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; - def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq (v16i8 MQPR:$val2)))), + def : Pat<(v16i8 (or (v16i8 MQPR:$val1), (vnotq MQPR:$val2))), (v16i8 (MVE_VORN (v16i8 MQPR:$val1), (v16i8 MQPR:$val2)))>; def : Pat<(v8i16 (or (v8i16 MQPR:$val1), (vnotq MQPR:$val2))), (v8i16 (MVE_VORN (v8i16 MQPR:$val1), (v8i16 MQPR:$val2)))>; def : Pat<(v4i32 (or (v4i32 MQPR:$val1), (vnotq MQPR:$val2))), (v4i32 (MVE_VORN (v4i32 MQPR:$val1), (v4i32 MQPR:$val2)))>; + def : Pat<(v2i64 (or (v2i64 MQPR:$val1), (vnotq MQPR:$val2))), + (v2i64 (MVE_VORN (v2i64 MQPR:$val1), (v2i64 MQPR:$val2)))>; } class MVE_bit_cmode cmode, dag inOps> Index: llvm/test/CodeGen/Thumb2/mve-abs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-abs.ll +++ llvm/test/CodeGen/Thumb2/mve-abs.ll @@ -36,3 +36,50 @@ %2 = select <4 x i1> %0, <4 x i32> %1, <4 x i32> %s1 ret <4 x i32> %2 } + +define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) { +; CHECK-LABEL: abs_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-NEXT: vmov r12, s2 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov r3, s3 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: rsbs.w lr, r12, #0 +; CHECK-NEXT: sbc.w r5, r0, r3 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: mov r2, lr +; CHECK-NEXT: lsrl r2, r5, #32 +; CHECK-NEXT: mov.w r5, #0 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r5, #1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq r2, r3 +; CHECK-NEXT: vmov r3, s1 +; CHECK-NEXT: rsbs r4, r1, #0 +; CHECK-NEXT: mov r6, r4 +; CHECK-NEXT: sbc.w r7, r0, r3 +; CHECK-NEXT: cmp r3, #0 +; CHECK-NEXT: lsrl r6, r7, #32 +; CHECK-NEXT: it mi +; CHECK-NEXT: movmi r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: ite eq +; CHECK-NEXT: moveq r6, r3 +; CHECK-NEXT: movne r1, r4 +; CHECK-NEXT: vmov.32 q0[0], r1 +; CHECK-NEXT: cmp r5, #0 +; CHECK-NEXT: vmov.32 q0[1], r6 +; CHECK-NEXT: it eq +; CHECK-NEXT: moveq lr, r12 +; CHECK-NEXT: vmov.32 q0[2], lr +; CHECK-NEXT: vmov.32 q0[3], r2 +; CHECK-NEXT: pop {r4, r5, r6, r7, pc} +entry: + %0 = icmp slt <2 x i64> %s1, zeroinitializer + %1 = sub nsw <2 x i64> zeroinitializer, %s1 + %2 = select <2 x i1> %0, <2 x i64> %1, <2 x i64> %s1 + ret <2 x i64> %2 +} Index: llvm/test/CodeGen/Thumb2/mve-bitarith.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-bitarith.ll +++ llvm/test/CodeGen/Thumb2/mve-bitarith.ll @@ -31,6 +31,16 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @and_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: and_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vand q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = and <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @or_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: or_int8_t: @@ -62,6 +72,16 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @or_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: or_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = or <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @xor_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: xor_int8_t: @@ -93,6 +113,16 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @xor_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: xor_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: veor q0, q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = xor <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @v_mvn_i8(<16 x i8> %src) { ; CHECK-LABEL: v_mvn_i8: ; CHECK: @ %bb.0: @ %entry @@ -123,6 +153,17 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @v_mvn_i64(<2 x i64> %src) { +; CHECK-LABEL: v_mvn_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmvn q0, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <2 x i64> %src, + ret <2 x i64> %0 +} + + define arm_aapcs_vfpcc <16 x i8> @v_bic_i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: v_bic_i8: ; CHECK: @ %bb.0: @ %entry @@ -156,6 +197,18 @@ ret <4 x i32> %1 } +define arm_aapcs_vfpcc <2 x i64> @v_bic_i64(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: v_bic_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vbic q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <2 x i64> %src1, + %1 = and <2 x i64> %src2, %0 + ret <2 x i64> %1 +} + + define arm_aapcs_vfpcc <16 x i8> @v_or_i8(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: v_or_i8: ; CHECK: @ %bb.0: @ %entry @@ -188,3 +241,15 @@ %1 = or <4 x i32> %src2, %0 ret <4 x i32> %1 } + +define arm_aapcs_vfpcc <2 x i64> @v_or_i64(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: v_or_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vorn q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = xor <2 x i64> %src1, + %1 = or <2 x i64> %src2, %0 + ret <2 x i64> %1 +} + Index: llvm/test/CodeGen/Thumb2/mve-div-expand.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-div-expand.ll +++ llvm/test/CodeGen/Thumb2/mve-div-expand.ll @@ -736,6 +736,144 @@ ret <16 x i8> %out } +define arm_aapcs_vfpcc <2 x i64> @udiv_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: udiv_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: vmov r1, s21 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: vmov r12, s22 +; CHECK-NEXT: vmov lr, s23 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: vmov r3, s19 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = udiv <2 x i64> %in1, %in2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @sdiv_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: sdiv_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: vmov r1, s21 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: vmov r12, s22 +; CHECK-NEXT: vmov lr, s23 +; CHECK-NEXT: vmov r2, s18 +; CHECK-NEXT: vmov r3, s19 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: mov r0, r12 +; CHECK-NEXT: mov r1, lr +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = sdiv <2 x i64> %in1, %in2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @urem_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: urem_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: vmov r1, s21 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: vmov r12, s18 +; CHECK-NEXT: vmov lr, s19 +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: vmov r1, s23 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: bl __aeabi_uldivmod +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.32 q4[3], r3 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = urem <2 x i64> %in1, %in2 + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @srem_i64(<2 x i64> %in1, <2 x i64> %in2) { +; CHECK-LABEL: srem_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, s20 +; CHECK-NEXT: vmov r1, s21 +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: vmov r12, s18 +; CHECK-NEXT: vmov lr, s19 +; CHECK-NEXT: vmov.32 q4[0], r2 +; CHECK-NEXT: vmov r0, s22 +; CHECK-NEXT: vmov.32 q4[1], r3 +; CHECK-NEXT: vmov r1, s23 +; CHECK-NEXT: mov r2, r12 +; CHECK-NEXT: mov r3, lr +; CHECK-NEXT: bl __aeabi_ldivmod +; CHECK-NEXT: vmov.32 q4[2], r2 +; CHECK-NEXT: vmov.32 q4[3], r3 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = srem <2 x i64> %in1, %in2 + ret <2 x i64> %out +} + + + define arm_aapcs_vfpcc <4 x float> @fdiv_f32(<4 x float> %in1, <4 x float> %in2) { ; CHECK-LABEL: fdiv_f32: @@ -992,3 +1130,59 @@ %out = frem <8 x half> %in1, %in2 ret <8 x half> %out } + +define arm_aapcs_vfpcc <2 x double> @fdiv_f64(<2 x double> %in1, <2 x double> %in2) { +; CHECK-LABEL: fdiv_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d11 +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: bl __aeabi_ddiv +; CHECK-NEXT: vmov lr, r12, d10 +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bl __aeabi_ddiv +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = fdiv <2 x double> %in1, %in2 + ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @frem_f64(<2 x double> %in1, <2 x double> %in2) { +; CHECK-LABEL: frem_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d11 +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: bl fmod +; CHECK-NEXT: vmov lr, r12, d10 +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bl fmod +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = frem <2 x double> %in1, %in2 + ret <2 x double> %out +} + + Index: llvm/test/CodeGen/Thumb2/mve-fmath.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fmath.ll +++ llvm/test/CodeGen/Thumb2/mve-fmath.ll @@ -66,6 +66,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @sqrt_float64_t(<2 x double> %src) { +; CHECK-LABEL: sqrt_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl sqrt +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl sqrt +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.sqrt.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @cos_float32_t(<4 x float> %src) { ; CHECK-LABEL: cos_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -198,6 +222,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @cos_float64_t(<2 x double> %src) { +; CHECK-LABEL: cos_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl cos +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl cos +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.cos.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @sin_float32_t(<4 x float> %src) { ; CHECK-LABEL: sin_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -330,6 +378,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @sin_float64_t(<2 x double> %src) { +; CHECK-LABEL: sin_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl sin +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl sin +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.sin.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @exp_float32_t(<4 x float> %src) { ; CHECK-LABEL: exp_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -462,6 +534,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @exp_float64_t(<2 x double> %src) { +; CHECK-LABEL: exp_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl exp +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl exp +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.exp.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @exp2_float32_t(<4 x float> %src) { ; CHECK-LABEL: exp2_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -594,6 +690,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @exp2_float64_t(<2 x double> %src) { +; CHECK-LABEL: exp2_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl exp2 +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl exp2 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.exp2.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @log_float32_t(<4 x float> %src) { ; CHECK-LABEL: log_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -726,6 +846,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @log_float64_t(<2 x double> %src) { +; CHECK-LABEL: log_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl log +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl log +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.log.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @log2_float32_t(<4 x float> %src) { ; CHECK-LABEL: log2_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -858,6 +1002,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @log2_float64_t(<2 x double> %src) { +; CHECK-LABEL: log2_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl log2 +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl log2 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.log2.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @log10_float32_t(<4 x float> %src) { ; CHECK-LABEL: log10_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -990,6 +1158,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @log10_float64_t(<2 x double> %src) { +; CHECK-LABEL: log10_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl log10 +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl log10 +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.log10.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @pow_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-LABEL: pow_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -1165,6 +1357,33 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @pow_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: pow_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d11 +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: bl pow +; CHECK-NEXT: vmov lr, r12, d10 +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bl pow +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.pow.v2f64(<2 x double> %src1, <2 x double> %src2) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @copysign_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-LABEL: copysign_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -1340,6 +1559,27 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @copysign_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: copysign_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r0, r1, d3 +; CHECK-NEXT: vmov r0, lr, d2 +; CHECK-NEXT: vmov r0, r3, d1 +; CHECK-NEXT: vmov r12, r2, d0 +; CHECK-NEXT: lsrs r1, r1, #31 +; CHECK-NEXT: bfi r3, r1, #31, #1 +; CHECK-NEXT: lsr.w r1, lr, #31 +; CHECK-NEXT: bfi r2, r1, #31, #1 +; CHECK-NEXT: vmov d1, r0, r3 +; CHECK-NEXT: vmov d0, r12, r2 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.copysign.v2f64(<2 x double> %src1, <2 x double> %src2) + ret <2 x double> %0 +} + declare <4 x float> @llvm.sqrt.v4f32(<4 x float>) declare <4 x float> @llvm.cos.v4f32(<4 x float>) declare <4 x float> @llvm.sin.v4f32(<4 x float>) @@ -1360,4 +1600,14 @@ declare <8 x half> @llvm.log10.v8f16(<8 x half>) declare <8 x half> @llvm.pow.v8f16(<8 x half>, <8 x half>) declare <8 x half> @llvm.copysign.v8f16(<8 x half>, <8 x half>) +declare <2 x double> @llvm.sqrt.v2f64(<2 x double>) +declare <2 x double> @llvm.cos.v2f64(<2 x double>) +declare <2 x double> @llvm.sin.v2f64(<2 x double>) +declare <2 x double> @llvm.exp.v2f64(<2 x double>) +declare <2 x double> @llvm.exp2.v2f64(<2 x double>) +declare <2 x double> @llvm.log.v2f64(<2 x double>) +declare <2 x double> @llvm.log2.v2f64(<2 x double>) +declare <2 x double> @llvm.log10.v2f64(<2 x double>) +declare <2 x double> @llvm.pow.v2f64(<2 x double>, <2 x double>) +declare <2 x double> @llvm.copysign.v2f64(<2 x double>, <2 x double>) Index: llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll +++ llvm/test/CodeGen/Thumb2/mve-fp-negabs.ll @@ -76,6 +76,39 @@ ret <4 x float> %0 } +define arm_aapcs_vfpcc <2 x double> @fneg_float64_t(<2 x double> %src) { +; CHECK-LABEL: fneg_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vldr d0, .LCPI2_0 +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: vmov r4, r5, d0 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_dsub +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r4 +; CHECK-NEXT: mov r1, r5 +; CHECK-NEXT: bl __aeabi_dsub +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r4, r5, r7, pc} +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI2_0: +; CHECK-NEXT: .long 0 @ double -0 +; CHECK-NEXT: .long 2147483648 +entry: + %0 = fsub nnan ninf nsz <2 x double> , %src + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <8 x half> @fabs_float16_t(<8 x half> %src) { ; CHECK-MVE-LABEL: fabs_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -150,6 +183,30 @@ ret <4 x float> %0 } +define arm_aapcs_vfpcc <2 x double> @fabs_float64_t(<2 x double> %src) { +; CHECK-LABEL: fabs_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vldr d2, .LCPI5_0 +; CHECK-NEXT: vmov r12, r3, d0 +; CHECK-NEXT: vmov r0, r1, d2 +; CHECK-NEXT: vmov r0, r2, d1 +; CHECK-NEXT: lsrs r1, r1, #31 +; CHECK-NEXT: bfi r2, r1, #31, #1 +; CHECK-NEXT: bfi r3, r1, #31, #1 +; CHECK-NEXT: vmov d1, r0, r2 +; CHECK-NEXT: vmov d0, r12, r3 +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 3 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI5_0: +; CHECK-NEXT: .long 0 @ double 0 +; CHECK-NEXT: .long 0 +entry: + %0 = call nnan ninf nsz <2 x double> @llvm.fabs.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + declare <4 x float> @llvm.fabs.v4f32(<4 x float>) declare <8 x half> @llvm.fabs.v8f16(<8 x half>) +declare <2 x double> @llvm.fabs.v2f64(<2 x double>) Index: llvm/test/CodeGen/Thumb2/mve-frint.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-frint.ll +++ llvm/test/CodeGen/Thumb2/mve-frint.ll @@ -76,6 +76,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @fceil_float64_t(<2 x double> %src) { +; CHECK-LABEL: fceil_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl ceil +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl ceil +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.ceil.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @ftrunc_float32_t(<4 x float> %src) { ; CHECK-MVE-LABEL: ftrunc_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -150,6 +174,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @ftrunc_float64_t(<2 x double> %src) { +; CHECK-LABEL: ftrunc_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl trunc +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl trunc +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.trunc.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @frint_float32_t(<4 x float> %src) { ; CHECK-MVE-LABEL: frint_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -224,6 +272,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @frint_float64_t(<2 x double> %src) { +; CHECK-LABEL: frint_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl rint +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl rint +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.rint.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @fnearbyint_float32_t(<4 x float> %src) { ; CHECK-LABEL: fnearbyint_float32_t: ; CHECK: @ %bb.0: @ %entry @@ -288,6 +360,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @fnearbyint_float64_t(<2 x double> %src) { +; CHECK-LABEL: fnearbyint_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl nearbyint +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.nearbyint.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @ffloor_float32_t(<4 x float> %src) { ; CHECK-MVE-LABEL: ffloor_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -362,6 +458,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @ffloor_float64_t(<2 x double> %src) { +; CHECK-LABEL: ffloor_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl floor +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl floor +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.floor.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <4 x float> @fround_float32_t(<4 x float> %src) { ; CHECK-MVE-LABEL: fround_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -436,6 +556,30 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @fround_float64_t(<2 x double> %src) { +; CHECK-LABEL: fround_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: bl round +; CHECK-NEXT: vmov r2, r3, d8 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl round +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = call fast <2 x double> @llvm.round.v2f64(<2 x double> %src) + ret <2 x double> %0 +} + declare <4 x float> @llvm.ceil.v4f32(<4 x float>) declare <4 x float> @llvm.trunc.v4f32(<4 x float>) declare <4 x float> @llvm.rint.v4f32(<4 x float>) @@ -448,3 +592,9 @@ declare <8 x half> @llvm.nearbyint.v8f16(<8 x half>) declare <8 x half> @llvm.floor.v8f16(<8 x half>) declare <8 x half> @llvm.round.v8f16(<8 x half>) +declare <2 x double> @llvm.ceil.v2f64(<2 x double>) +declare <2 x double> @llvm.trunc.v2f64(<2 x double>) +declare <2 x double> @llvm.rint.v2f64(<2 x double>) +declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>) +declare <2 x double> @llvm.floor.v2f64(<2 x double>) +declare <2 x double> @llvm.round.v2f64(<2 x double>) Index: llvm/test/CodeGen/Thumb2/mve-minmax.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-minmax.ll +++ llvm/test/CodeGen/Thumb2/mve-minmax.ll @@ -35,6 +35,49 @@ ret <4 x i32> %1 } +define arm_aapcs_vfpcc <2 x i64> @smin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: smin_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: vmov r12, s7 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: sbcs.w r1, r1, r12 +; CHECK-NEXT: vmov r12, s5 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: sbcs.w r2, lr, r12 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = icmp slt <2 x i64> %s1, %s2 + %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <16 x i8> @umin_v16i8(<16 x i8> %s1, <16 x i8> %s2) { ; CHECK-LABEL: umin_v16i8: ; CHECK: @ %bb.0: @ %entry @@ -68,6 +111,49 @@ ret <4 x i32> %1 } +define arm_aapcs_vfpcc <2 x i64> @umin_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: umin_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: vmov r12, s7 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: vmov lr, s1 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: vmov r3, s0 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: sbcs.w r1, r1, r12 +; CHECK-NEXT: vmov r12, s5 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: sbcs.w r2, lr, r12 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = icmp ult <2 x i64> %s1, %s2 + %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <16 x i8> @smax_v16i8(<16 x i8> %s1, <16 x i8> %s2) { ; CHECK-LABEL: smax_v16i8: @@ -102,6 +188,49 @@ ret <4 x i32> %1 } +define arm_aapcs_vfpcc <2 x i64> @smax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: smax_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov r1, s7 +; CHECK-NEXT: vmov lr, s5 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: sbcs.w r1, r1, r12 +; CHECK-NEXT: vmov r12, s1 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: sbcs.w r2, lr, r12 +; CHECK-NEXT: it lt +; CHECK-NEXT: movlt r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = icmp sgt <2 x i64> %s1, %s2 + %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <16 x i8> @umax_v16i8(<16 x i8> %s1, <16 x i8> %s2) { ; CHECK-LABEL: umax_v16i8: ; CHECK: @ %bb.0: @ %entry @@ -135,6 +264,49 @@ ret <4 x i32> %1 } +define arm_aapcs_vfpcc <2 x i64> @umax_v2i64(<2 x i64> %s1, <2 x i64> %s2) { +; CHECK-LABEL: umax_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: movs r0, #0 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov r12, s3 +; CHECK-NEXT: vmov r1, s7 +; CHECK-NEXT: vmov lr, s5 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: vmov r3, s4 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: sbcs.w r1, r1, r12 +; CHECK-NEXT: vmov r12, s1 +; CHECK-NEXT: mov.w r1, #0 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r1, #1 +; CHECK-NEXT: cmp r1, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r1, #-1 +; CHECK-NEXT: subs r2, r3, r2 +; CHECK-NEXT: sbcs.w r2, lr, r12 +; CHECK-NEXT: it lo +; CHECK-NEXT: movlo r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q2[0], r0 +; CHECK-NEXT: vmov.32 q2[1], r0 +; CHECK-NEXT: vmov.32 q2[2], r1 +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vbic q1, q1, q2 +; CHECK-NEXT: vand q0, q0, q2 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = icmp ugt <2 x i64> %s1, %s2 + %1 = select <2 x i1> %0, <2 x i64> %s1, <2 x i64> %s2 + ret <2 x i64> %1 +} + define arm_aapcs_vfpcc <4 x float> @maxnm_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: maxnm_float32_t: @@ -227,3 +399,46 @@ %0 = select <8 x i1> %cmp, <8 x half> %src1, <8 x half> %src2 ret <8 x half> %0 } + +define arm_aapcs_vfpcc <2 x double> @maxnm_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: maxnm_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, lr} +; CHECK-NEXT: push {r4, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r2, r3, d11 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: mov r4, r0 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: vmov r2, r3, d10 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r4, #1 +; CHECK-NEXT: cmp r4, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r4, #-1 +; CHECK-NEXT: bl __aeabi_dcmpgt +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne r0, #1 +; CHECK-NEXT: cmp r0, #0 +; CHECK-NEXT: it ne +; CHECK-NEXT: movne.w r0, #-1 +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.32 q0[2], r4 +; CHECK-NEXT: vmov.32 q0[3], r4 +; CHECK-NEXT: vbic q1, q5, q0 +; CHECK-NEXT: vand q0, q4, q0 +; CHECK-NEXT: vorr q0, q0, q1 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r4, pc} +entry: + %cmp = fcmp fast ogt <2 x double> %src2, %src1 + %0 = select <2 x i1> %cmp, <2 x double> %src2, <2 x double> %src1 + ret <2 x double> %0 +} Index: llvm/test/CodeGen/Thumb2/mve-neg.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-neg.ll +++ llvm/test/CodeGen/Thumb2/mve-neg.ll @@ -30,3 +30,26 @@ %0 = sub nsw <4 x i32> zeroinitializer, %s1 ret <4 x i32> %0 } + +define arm_aapcs_vfpcc <2 x i64> @neg_v2i64(<2 x i64> %s1) { +; CHECK-LABEL: neg_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: mov.w r12, #0 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s1 +; CHECK-NEXT: rsbs r1, r1, #0 +; CHECK-NEXT: sbc.w r0, r12, r0 +; CHECK-NEXT: rsbs r2, r2, #0 +; CHECK-NEXT: sbc.w r3, r12, r3 +; CHECK-NEXT: vmov.32 q0[0], r2 +; CHECK-NEXT: vmov.32 q0[1], r3 +; CHECK-NEXT: vmov.32 q0[2], r1 +; CHECK-NEXT: vmov.32 q0[3], r0 +; CHECK-NEXT: bx lr +entry: + %0 = sub nsw <2 x i64> zeroinitializer, %s1 + ret <2 x i64> %0 +} + Index: llvm/test/CodeGen/Thumb2/mve-sext.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-sext.ll +++ llvm/test/CodeGen/Thumb2/mve-sext.ll @@ -32,6 +32,27 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @sext_v2i32_v2i64(<2 x i32> %src) { +; CHECK-LABEL: sext_v2i32_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s2, s1 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: lsll r0, r1, #32 +; CHECK-NEXT: asrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: lsll r0, r1, #32 +; CHECK-NEXT: asrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = sext <2 x i32> %src to <2 x i64> + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <8 x i16> @zext_v8i8_v8i16(<8 x i8> %src) { ; CHECK-LABEL: zext_v8i8_v8i16: @@ -64,6 +85,19 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @zext_v2i32_v2i64(<2 x i32> %src) { +; CHECK-LABEL: zext_v2i32_v2i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i32 q1, #0x0 +; CHECK-NEXT: vmov.f32 s4, s0 +; CHECK-NEXT: vmov.f32 s6, s1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = zext <2 x i32> %src to <2 x i64> + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <8 x i8> @trunc_v8i16_v8i8(<8 x i16> %src) { ; CHECK-LABEL: trunc_v8i16_v8i8: @@ -91,3 +125,14 @@ %0 = trunc <4 x i32> %src to <4 x i8> ret <4 x i8> %0 } + +define arm_aapcs_vfpcc <2 x i32> @trunc_v2i64_v2i32(<2 x i64> %src) { +; CHECK-LABEL: trunc_v2i64_v2i32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s1, s2 +; CHECK-NEXT: bx lr +entry: + %0 = trunc <2 x i64> %src to <2 x i32> + ret <2 x i32> %0 +} + Index: llvm/test/CodeGen/Thumb2/mve-shifts.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shifts.ll +++ llvm/test/CodeGen/Thumb2/mve-shifts.ll @@ -31,6 +31,28 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shl_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: shl_qq_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = shl <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shru_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: shru_qq_int8_t: @@ -65,6 +87,29 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shru_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: shru_qq_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s3 +; CHECK-NEXT: rsbs r0, r0, #0 +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: rsb.w r12, r0, #0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.32 q0[0], r2 +; CHECK-NEXT: lsll r0, r3, r12 +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.32 q0[3], r3 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shrs_qq_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: shrs_qq_int8_t: @@ -99,6 +144,28 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shrs_qq_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: shrs_qq_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: asrl r2, r1, r0 +; CHECK-NEXT: vmov r0, s6 +; CHECK-NEXT: vmov.32 q2[0], r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov.32 q2[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: asrl r2, r1, r0 +; CHECK-NEXT: vmov.32 q2[2], r2 +; CHECK-NEXT: vmov.32 q2[3], r1 +; CHECK-NEXT: vmov q0, q2 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shl_qi_int8_t(<16 x i8> %src1) { ; CHECK-LABEL: shl_qi_int8_t: @@ -130,6 +197,26 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shl_qi_int64_t(<2 x i64> %src1) { +; CHECK-LABEL: shl_qi_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: lsll r0, r1, #4 +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: lsll r0, r1, #4 +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = shl <2 x i64> %src1, + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shru_qi_int8_t(<16 x i8> %src1) { ; CHECK-LABEL: shru_qi_int8_t: @@ -161,6 +248,26 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shru_qi_int64_t(<2 x i64> %src1) { +; CHECK-LABEL: shru_qi_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: lsrl r0, r1, #4 +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: lsrl r0, r1, #4 +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = lshr <2 x i64> %src1, + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shrs_qi_int8_t(<16 x i8> %src1) { ; CHECK-LABEL: shrs_qi_int8_t: @@ -192,6 +299,25 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shrs_qi_int64_t(<2 x i64> %src1) { +; CHECK-LABEL: shrs_qi_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: asrl r0, r1, #4 +; CHECK-NEXT: vmov.32 q1[0], r0 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: asrl r0, r1, #4 +; CHECK-NEXT: vmov.32 q1[2], r0 +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %0 = ashr <2 x i64> %src1, + ret <2 x i64> %0 +} define arm_aapcs_vfpcc <16 x i8> @shl_qr_int8_t(<16 x i8> %src1, i8 %src2) { @@ -230,6 +356,28 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shl_qr_int64_t(<2 x i64> %src1, i64 %src2) { +; CHECK-LABEL: shl_qr_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: lsll r2, r1, r0 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %0 = shl <2 x i64> %src1, %s + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shru_qr_int8_t(<16 x i8> %src1, i8 %src2) { ; CHECK-LABEL: shru_qr_int8_t: @@ -273,6 +421,28 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shru_qr_int64_t(<2 x i64> %src1, i64 %src2) { +; CHECK-LABEL: shru_qr_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: rsb.w r12, r0, #0 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: lsll r2, r1, r12 +; CHECK-NEXT: vmov r3, s3 +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov.32 q0[0], r2 +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: lsll r0, r3, r12 +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.32 q0[3], r3 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %0 = lshr <2 x i64> %src1, %s + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <16 x i8> @shrs_qr_int8_t(<16 x i8> %src1, i8 %src2) { ; CHECK-LABEL: shrs_qr_int8_t: @@ -316,4 +486,25 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @shrs_qr_int64_t(<2 x i64> %src1, i64 %src2) { +; CHECK-LABEL: shrs_qr_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: asrl r2, r1, r0 +; CHECK-NEXT: vmov.32 q1[0], r2 +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov.32 q1[1], r1 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: asrl r2, r1, r0 +; CHECK-NEXT: vmov.32 q1[2], r2 +; CHECK-NEXT: vmov.32 q1[3], r1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %i = insertelement <2 x i64> undef, i64 %src2, i32 0 + %s = shufflevector <2 x i64> %i, <2 x i64> undef, <2 x i32> zeroinitializer + %0 = ashr <2 x i64> %src1, %s + ret <2 x i64> %0 +} Index: llvm/test/CodeGen/Thumb2/mve-shuffle.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-shuffle.ll +++ llvm/test/CodeGen/Thumb2/mve-shuffle.ll @@ -262,6 +262,38 @@ ret <16 x i8> %out } +define arm_aapcs_vfpcc <2 x i64> @shuffle1_i64(<2 x i64> %src) { +; CHECK-LABEL: shuffle1_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @shuffle2_i64(<2 x i64> %src) { +; CHECK-LABEL: shuffle2_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s5, s3 +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @shuffle3_i64(<2 x i64> %src) { +; CHECK-LABEL: shuffle3_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> + ret <2 x i64> %out +} + define arm_aapcs_vfpcc <4 x float> @shuffle1_f32(<4 x float> %src) { ; CHECK-LABEL: shuffle1_f32: ; CHECK: @ %bb.0: @ %entry @@ -390,6 +422,38 @@ ret <8 x half> %out } +define arm_aapcs_vfpcc <2 x double> @shuffle1_f64(<2 x double> %src) { +; CHECK-LABEL: shuffle1_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> + ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @shuffle2_f64(<2 x double> %src) { +; CHECK-LABEL: shuffle2_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s4, s2 +; CHECK-NEXT: vmov.f32 s5, s3 +; CHECK-NEXT: vmov.f32 s6, s0 +; CHECK-NEXT: vmov.f32 s7, s1 +; CHECK-NEXT: vmov q0, q1 +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> + ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @shuffle3_f64(<2 x double> %src) { +; CHECK-LABEL: shuffle3_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> + ret <2 x double> %out +} + define arm_aapcs_vfpcc <4 x i32> @insert_i32(i32 %a) { ; CHECK-LABEL: insert_i32: @@ -421,6 +485,17 @@ ret <16 x i8> %res } +define arm_aapcs_vfpcc <2 x i64> @insert_i64(i64 %a) { +; CHECK-LABEL: insert_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: bx lr +entry: + %res = insertelement <2 x i64> undef, i64 %a, i32 0 + ret <2 x i64> %res +} + define arm_aapcs_vfpcc <4 x float> @insert_f32(float %a) { ; CHECK-LABEL: insert_f32: ; CHECK: @ %bb.0: @ %entry @@ -443,12 +518,35 @@ ret <8 x half> %res } +define arm_aapcs_vfpcc <2 x double> @insert_f64(double %a) { +; CHECK-LABEL: insert_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r6, r7, lr} +; CHECK-NEXT: push {r4, r6, r7, lr} +; CHECK-NEXT: .setfp r7, sp, #8 +; CHECK-NEXT: add r7, sp, #8 +; CHECK-NEXT: .pad #16 +; CHECK-NEXT: sub sp, #16 +; CHECK-NEXT: mov r4, sp +; CHECK-NEXT: bfc r4, #0, #4 +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: sub.w r4, r7, #8 +; CHECK-NEXT: vstr d0, [sp] +; CHECK-NEXT: mov r0, sp +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: mov sp, r4 +; CHECK-NEXT: pop {r4, r6, r7, pc} +entry: + %res = insertelement <2 x double> undef, double %a, i32 0 + ret <2 x double> %res +} + define arm_aapcs_vfpcc i64 @scalar_to_vector_i32(<8 x i16> %v) { ; CHECK-LABEL: scalar_to_vector_i32: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: .pad #8 ; CHECK-NEXT: sub sp, #8 -; CHECK-NEXT: adr r1, .LCPI30_0 +; CHECK-NEXT: adr r1, .LCPI38_0 ; CHECK-NEXT: vmov.u16 r0, q0[0] ; CHECK-NEXT: vldrw.u32 q1, [r1] ; CHECK-NEXT: vmov.32 q0[0], r0 @@ -461,7 +559,7 @@ ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI30_0: +; CHECK-NEXT: .LCPI38_0: ; CHECK-NEXT: .zero 4 ; CHECK-NEXT: .long 7 @ 0x7 ; CHECK-NEXT: .long 1 @ 0x1 @@ -533,6 +631,28 @@ ret i8 %res } +define arm_aapcs_vfpcc i64 @extract_i64_0(<2 x i64> %a) { +; CHECK-LABEL: extract_i64_0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: bx lr +entry: + %res = extractelement <2 x i64> %a, i32 0 + ret i64 %res +} + +define arm_aapcs_vfpcc i64 @extract_i64_1(<2 x i64> %a) { +; CHECK-LABEL: extract_i64_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov r0, s2 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: bx lr +entry: + %res = extractelement <2 x i64> %a, i32 1 + ret i64 %res +} + define arm_aapcs_vfpcc float @extract_f32_0(<4 x float> %a) { ; CHECK-LABEL: extract_f32_0: ; CHECK: @ %bb.0: @ %entry @@ -576,3 +696,25 @@ %res = extractelement <8 x half> %a, i32 3 ret half %res } + +define arm_aapcs_vfpcc double @extract_f64_0(<2 x double> %a) { +; CHECK-LABEL: extract_f64_0: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: @ kill: def $d0 killed $d0 killed $q0 +; CHECK-NEXT: bx lr +entry: + %res = extractelement <2 x double> %a, i32 0 + ret double %res +} + +define arm_aapcs_vfpcc double @extract_f64_1(<2 x double> %a) { +; CHECK-LABEL: extract_f64_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: vmov.f32 s1, s3 +; CHECK-NEXT: bx lr +entry: + %res = extractelement <2 x double> %a, i32 1 + ret double %res +} + Index: llvm/test/CodeGen/Thumb2/mve-simple-arith.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-simple-arith.ll +++ llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -32,6 +32,33 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @add_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: add_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r2, s6 +; CHECK-NEXT: vmov r3, s2 +; CHECK-NEXT: vmov r0, s7 +; CHECK-NEXT: vmov r1, s3 +; CHECK-NEXT: adds.w lr, r3, r2 +; CHECK-NEXT: vmov r2, s0 +; CHECK-NEXT: vmov r3, s1 +; CHECK-NEXT: adc.w r12, r1, r0 +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s5 +; CHECK-NEXT: adds r0, r0, r2 +; CHECK-NEXT: adcs r1, r3 +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.32 q0[2], lr +; CHECK-NEXT: vmov.32 q0[3], r12 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = add nsw <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: add_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -122,6 +149,33 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @add_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: add_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r2, r3, d11 +; CHECK-NEXT: bl __aeabi_dadd +; CHECK-NEXT: vmov lr, r12, d8 +; CHECK-NEXT: vmov r2, r3, d10 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bl __aeabi_dadd +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = fadd nnan ninf nsz <2 x double> %src2, %src1 + ret <2 x double> %0 +} + define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_int8_t: @@ -153,6 +207,33 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @sub_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: sub_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: vmov r2, s2 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov r0, s3 +; CHECK-NEXT: vmov r1, s7 +; CHECK-NEXT: subs.w lr, r3, r2 +; CHECK-NEXT: vmov r2, s4 +; CHECK-NEXT: vmov r3, s5 +; CHECK-NEXT: sbc.w r12, r1, r0 +; CHECK-NEXT: vmov r0, s0 +; CHECK-NEXT: vmov r1, s1 +; CHECK-NEXT: subs r0, r2, r0 +; CHECK-NEXT: sbc.w r1, r3, r1 +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.32 q0[2], lr +; CHECK-NEXT: vmov.32 q0[3], r12 +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = sub nsw <2 x i64> %src2, %src1 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) { ; CHECK-MVE-LABEL: sub_float32_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -243,6 +324,34 @@ ret <8 x half> %0 } +define arm_aapcs_vfpcc <2 x double> @sub_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: sub_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r2, r3, d11 +; CHECK-NEXT: bl __aeabi_dsub +; CHECK-NEXT: vmov lr, r12, d8 +; CHECK-NEXT: vmov r2, r3, d10 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bl __aeabi_dsub +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = fsub nnan ninf nsz <2 x double> %src2, %src1 + ret <2 x double> %0 +} + + define arm_aapcs_vfpcc <16 x i8> @mul_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: mul_int8_t: ; CHECK: @ %bb.0: @ %entry @@ -273,6 +382,35 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <2 x i64> @mul_int64_t(<2 x i64> %src1, <2 x i64> %src2) { +; CHECK-LABEL: mul_int64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r4, r5, r7, lr} +; CHECK-NEXT: push {r4, r5, r7, lr} +; CHECK-NEXT: vmov r0, s4 +; CHECK-NEXT: vmov r1, s0 +; CHECK-NEXT: vmov r2, s5 +; CHECK-NEXT: umull r12, r3, r1, r0 +; CHECK-NEXT: mla lr, r1, r2, r3 +; CHECK-NEXT: vmov r3, s6 +; CHECK-NEXT: vmov r1, s2 +; CHECK-NEXT: vmov r2, s7 +; CHECK-NEXT: umull r4, r5, r1, r3 +; CHECK-NEXT: mla r1, r1, r2, r5 +; CHECK-NEXT: vmov r2, s1 +; CHECK-NEXT: mla r0, r2, r0, lr +; CHECK-NEXT: vmov r2, s3 +; CHECK-NEXT: vmov.32 q0[0], r12 +; CHECK-NEXT: vmov.32 q0[1], r0 +; CHECK-NEXT: vmov.32 q0[2], r4 +; CHECK-NEXT: mla r1, r2, r3, r1 +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: pop {r4, r5, r7, pc} +entry: + %0 = mul nsw <2 x i64> %src1, %src2 + ret <2 x i64> %0 +} + define arm_aapcs_vfpcc <8 x half> @mul_float16_t(<8 x half> %src1, <8 x half> %src2) { ; CHECK-MVE-LABEL: mul_float16_t: ; CHECK-MVE: @ %bb.0: @ %entry @@ -362,3 +500,31 @@ %0 = fmul nnan ninf nsz <4 x float> %src2, %src1 ret <4 x float> %0 } + +define arm_aapcs_vfpcc <2 x double> @mul_float64_t(<2 x double> %src1, <2 x double> %src2) { +; CHECK-LABEL: mul_float64_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9, d10, d11} +; CHECK-NEXT: vpush {d8, d9, d10, d11} +; CHECK-NEXT: vmov q4, q1 +; CHECK-NEXT: vmov q5, q0 +; CHECK-NEXT: vmov r0, r1, d9 +; CHECK-NEXT: vmov r2, r3, d11 +; CHECK-NEXT: bl __aeabi_dmul +; CHECK-NEXT: vmov lr, r12, d8 +; CHECK-NEXT: vmov r2, r3, d10 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, lr +; CHECK-NEXT: mov r1, r12 +; CHECK-NEXT: bl __aeabi_dmul +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9, d10, d11} +; CHECK-NEXT: pop {r7, pc} +entry: + %0 = fmul nnan ninf nsz <2 x double> %src2, %src1 + ret <2 x double> %0 +} + Index: llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll +++ llvm/test/CodeGen/Thumb2/mve-soft-float-abi.ll @@ -50,6 +50,39 @@ ret <4 x i32> %sum } +define <2 x i64> @vector_add_i64(<2 x i64> %lhs, <2 x i64> %rhs) { +; CHECK-FP-LABEL: vector_add_i64: +; CHECK-FP: @ %bb.0: @ %entry +; CHECK-FP-NEXT: .save {r7, lr} +; CHECK-FP-NEXT: push {r7, lr} +; CHECK-FP-NEXT: vmov d1, r2, r3 +; CHECK-FP-NEXT: vmov d0, r0, r1 +; CHECK-FP-NEXT: add r0, sp, #8 +; CHECK-FP-NEXT: vldrw.u32 q1, [r0] +; CHECK-FP-NEXT: vmov r1, s2 +; CHECK-FP-NEXT: vmov r0, s3 +; CHECK-FP-NEXT: vmov r3, s6 +; CHECK-FP-NEXT: vmov r2, s7 +; CHECK-FP-NEXT: adds.w lr, r1, r3 +; CHECK-FP-NEXT: vmov r3, s0 +; CHECK-FP-NEXT: vmov r1, s4 +; CHECK-FP-NEXT: adc.w r12, r0, r2 +; CHECK-FP-NEXT: vmov r2, s1 +; CHECK-FP-NEXT: vmov r0, s5 +; CHECK-FP-NEXT: adds r1, r1, r3 +; CHECK-FP-NEXT: vmov.32 q0[0], r1 +; CHECK-FP-NEXT: adcs r0, r2 +; CHECK-FP-NEXT: vmov.32 q0[1], r0 +; CHECK-FP-NEXT: vmov.32 q0[2], lr +; CHECK-FP-NEXT: vmov.32 q0[3], r12 +; CHECK-FP-NEXT: vmov r0, r1, d0 +; CHECK-FP-NEXT: vmov r2, r3, d1 +; CHECK-FP-NEXT: pop {r7, pc} +entry: + %sum = add <2 x i64> %lhs, %rhs + ret <2 x i64> %sum +} + define <8 x half> @vector_add_f16(<8 x half> %lhs, <8 x half> %rhs) { ; CHECK-FP-LABEL: vector_add_f16: ; CHECK-FP: @ %bb.0: @ %entry @@ -81,3 +114,38 @@ %sum = fadd <4 x float> %lhs, %rhs ret <4 x float> %sum } + +define <2 x double> @vector_add_f64(<2 x double> %lhs, <2 x double> %rhs) { +; CHECK-FP-LABEL: vector_add_f64: +; CHECK-FP: @ %bb.0: @ %entry +; CHECK-FP-NEXT: .save {r4, r5, r6, r7, lr} +; CHECK-FP-NEXT: push {r4, r5, r6, r7, lr} +; CHECK-FP-NEXT: .pad #4 +; CHECK-FP-NEXT: sub sp, #4 +; CHECK-FP-NEXT: .vsave {d8, d9} +; CHECK-FP-NEXT: vpush {d8, d9} +; CHECK-FP-NEXT: mov r5, r0 +; CHECK-FP-NEXT: add r0, sp, #40 +; CHECK-FP-NEXT: vldrw.u32 q4, [r0] +; CHECK-FP-NEXT: mov r4, r2 +; CHECK-FP-NEXT: mov r6, r3 +; CHECK-FP-NEXT: mov r7, r1 +; CHECK-FP-NEXT: vmov r2, r3, d9 +; CHECK-FP-NEXT: mov r0, r4 +; CHECK-FP-NEXT: mov r1, r6 +; CHECK-FP-NEXT: bl __aeabi_dadd +; CHECK-FP-NEXT: vmov r2, r3, d8 +; CHECK-FP-NEXT: vmov d9, r0, r1 +; CHECK-FP-NEXT: mov r0, r5 +; CHECK-FP-NEXT: mov r1, r7 +; CHECK-FP-NEXT: bl __aeabi_dadd +; CHECK-FP-NEXT: vmov d8, r0, r1 +; CHECK-FP-NEXT: vmov r2, r3, d9 +; CHECK-FP-NEXT: vmov r0, r1, d8 +; CHECK-FP-NEXT: vpop {d8, d9} +; CHECK-FP-NEXT: add sp, #4 +; CHECK-FP-NEXT: pop {r4, r5, r6, r7, pc} +entry: + %sum = fadd <2 x double> %lhs, %rhs + ret <2 x double> %sum +} Index: llvm/test/CodeGen/Thumb2/mve-vcvt.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vcvt.ll +++ llvm/test/CodeGen/Thumb2/mve-vcvt.ll @@ -317,3 +317,111 @@ %out = fptoui <8 x half> %src to <8 x i16> ret <8 x i16> %out } + +define arm_aapcs_vfpcc <2 x double> @foo_float_int64(<2 x i64> %src) { +; CHECK-LABEL: foo_float_int64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r1, s19 +; CHECK-NEXT: bl __aeabi_l2d +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl __aeabi_l2d +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = sitofp <2 x i64> %src to <2 x double> + ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x double> @foo_float_uint64(<2 x i64> %src) { +; CHECK-LABEL: foo_float_uint64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, s18 +; CHECK-NEXT: vmov r1, s19 +; CHECK-NEXT: bl __aeabi_ul2d +; CHECK-NEXT: vmov r2, s16 +; CHECK-NEXT: vmov r3, s17 +; CHECK-NEXT: vmov d9, r0, r1 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl __aeabi_ul2d +; CHECK-NEXT: vmov d8, r0, r1 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = uitofp <2 x i64> %src to <2 x double> + ret <2 x double> %out +} + +define arm_aapcs_vfpcc <2 x i64> @foo_int64_float(<2 x double> %src) { +; CHECK-LABEL: foo_int64_float: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl __aeabi_d2lz +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = fptosi <2 x double> %src to <2 x i64> + ret <2 x i64> %out +} + +define arm_aapcs_vfpcc <2 x i64> @foo_uint64_float(<2 x double> %src) { +; CHECK-LABEL: foo_uint64_float: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: .vsave {d8, d9} +; CHECK-NEXT: vpush {d8, d9} +; CHECK-NEXT: vmov q4, q0 +; CHECK-NEXT: vmov r0, r1, d8 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: vmov r2, r3, d9 +; CHECK-NEXT: vmov.32 q4[0], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[1], r0 +; CHECK-NEXT: mov r0, r2 +; CHECK-NEXT: mov r1, r3 +; CHECK-NEXT: bl __aeabi_d2ulz +; CHECK-NEXT: vmov.32 q4[2], r0 +; CHECK-NEXT: lsrl r0, r1, #32 +; CHECK-NEXT: vmov.32 q4[3], r0 +; CHECK-NEXT: vmov q0, q4 +; CHECK-NEXT: vpop {d8, d9} +; CHECK-NEXT: pop {r7, pc} +entry: + %out = fptoui <2 x double> %src to <2 x i64> + ret <2 x i64> %out +} Index: llvm/test/CodeGen/Thumb2/mve-vdup.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vdup.ll +++ llvm/test/CodeGen/Thumb2/mve-vdup.ll @@ -35,6 +35,20 @@ ret <16 x i8> %out } +define arm_aapcs_vfpcc <2 x i64> @vdup_i64(i64 %src) { +; CHECK-LABEL: vdup_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.32 q0[0], r0 +; CHECK-NEXT: vmov.32 q0[1], r1 +; CHECK-NEXT: vmov.32 q0[2], r0 +; CHECK-NEXT: vmov.32 q0[3], r1 +; CHECK-NEXT: bx lr +entry: + %0 = insertelement <2 x i64> undef, i64 %src, i32 0 + %out = shufflevector <2 x i64> %0, <2 x i64> undef, <2 x i32> zeroinitializer + ret <2 x i64> %out +} + define arm_aapcs_vfpcc <4 x float> @vdup_f32_1(float %src) { ; CHECK-LABEL: vdup_f32_1: ; CHECK: @ %bb.0: @ %entry @@ -80,6 +94,19 @@ ret <8 x half> %out } +define arm_aapcs_vfpcc <2 x double> @vdup_f64(double %src) { +; CHECK-LABEL: vdup_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: @ kill: def $d0 killed $d0 def $q0 +; CHECK-NEXT: vmov.f32 s2, s0 +; CHECK-NEXT: vmov.f32 s3, s1 +; CHECK-NEXT: bx lr +entry: + %0 = insertelement <2 x double> undef, double %src, i32 0 + %out = shufflevector <2 x double> %0, <2 x double> undef, <2 x i32> zeroinitializer + ret <2 x double> %out +} + define arm_aapcs_vfpcc <4 x i32> @vduplane_i32(<4 x i32> %src) { @@ -115,6 +142,17 @@ ret <16 x i8> %out } +define arm_aapcs_vfpcc <2 x i64> @vduplane_i64(<2 x i64> %src) { +; CHECK-LABEL: vduplane_i64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: vmov.f32 s1, s3 +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x i64> %src, <2 x i64> undef, <2 x i32> + ret <2 x i64> %out +} + define arm_aapcs_vfpcc <4 x float> @vduplane_f32(<4 x float> %src) { ; CHECK-LABEL: vduplane_f32: ; CHECK: @ %bb.0: @ %entry @@ -136,3 +174,14 @@ %out = shufflevector <8 x half> %src, <8 x half> undef, <8 x i32> ret <8 x half> %out } + +define arm_aapcs_vfpcc <2 x double> @vduplane_f64(<2 x double> %src) { +; CHECK-LABEL: vduplane_f64: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.f32 s0, s2 +; CHECK-NEXT: vmov.f32 s1, s3 +; CHECK-NEXT: bx lr +entry: + %out = shufflevector <2 x double> %src, <2 x double> undef, <2 x i32> + ret <2 x double> %out +} Index: llvm/test/CodeGen/Thumb2/mve-vmovimm.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vmovimm.ll +++ llvm/test/CodeGen/Thumb2/mve-vmovimm.ll @@ -11,7 +11,7 @@ ret <16 x i8> } -define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1(i8 *%dest) { +define arm_aapcs_vfpcc <16 x i8> @mov_int8_m1() { ; CHECK-LABEL: mov_int8_m1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i8 q0, #0xff @@ -20,7 +20,7 @@ ret <16 x i8> } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_1(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_1() { ; CHECK-LABEL: mov_int16_1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i16 q0, #0x1 @@ -29,7 +29,7 @@ ret <8 x i16> } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_m1() { ; CHECK-LABEL: mov_int16_m1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i8 q0, #0xff @@ -38,7 +38,7 @@ ret <8 x i16> } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_256(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_256() { ; CHECK-LABEL: mov_int16_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i16 q0, #0x100 @@ -56,7 +56,7 @@ ret <8 x i16> } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_258(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_258() { ; CHECK-LABEL: mov_int16_258: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI6_0 @@ -73,7 +73,7 @@ ret <8 x i16> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_1(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_1() { ; CHECK-LABEL: mov_int32_1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q0, #0x1 @@ -82,7 +82,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_256(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_256() { ; CHECK-LABEL: mov_int32_256: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q0, #0x100 @@ -91,7 +91,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_65536() { ; CHECK-LABEL: mov_int32_65536: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q0, #0x10000 @@ -100,7 +100,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777216() { ; CHECK-LABEL: mov_int32_16777216: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q0, #0x1000000 @@ -109,7 +109,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_16777217() { ; CHECK-LABEL: mov_int32_16777217: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI11_0 @@ -126,7 +126,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_17919() { ; CHECK-LABEL: mov_int32_17919: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q0, #0x45ff @@ -135,7 +135,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4587519() { ; CHECK-LABEL: mov_int32_4587519: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i32 q0, #0x45ffff @@ -144,7 +144,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m1() { ; CHECK-LABEL: mov_int32_m1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i8 q0, #0xff @@ -153,7 +153,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294901760() { ; CHECK-LABEL: mov_int32_4294901760: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0xffff @@ -162,7 +162,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278190335() { ; CHECK-LABEL: mov_int32_4278190335: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI16_0 @@ -179,7 +179,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278255615() { ; CHECK-LABEL: mov_int32_4278255615: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0xff0000 @@ -188,8 +188,8 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x float> @mov_float_1(float *%dest) { -; CHECK-LABEL: mov_float_1: +define arm_aapcs_vfpcc <2 x i64> @mov_int64_1() { +; CHECK-LABEL: mov_int64_1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI18_0 ; CHECK-NEXT: vldrw.u32 q0, [r0] @@ -197,6 +197,32 @@ ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: ; CHECK-NEXT: .LCPI18_0: +; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324 +; CHECK-NEXT: .long 0 +; CHECK-NEXT: .long 1 @ double 4.9406564584124654E-324 +; CHECK-NEXT: .long 0 +entry: + ret <2 x i64> +} + +define arm_aapcs_vfpcc <2 x i64> @mov_int64_m1() { +; CHECK-LABEL: mov_int64_m1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vmov.i8 q0, #0xff +; CHECK-NEXT: bx lr +entry: + ret <2 x i64> +} + +define arm_aapcs_vfpcc <4 x float> @mov_float_1() { +; CHECK-LABEL: mov_float_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI20_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI20_0: ; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234 ; CHECK-NEXT: .long 1065353216 ; CHECK-NEXT: .long 1065353216 @ double 0.007812501848093234 @@ -205,15 +231,15 @@ ret <4 x float> } -define arm_aapcs_vfpcc <4 x float> @mov_float_m3(float *%dest) { +define arm_aapcs_vfpcc <4 x float> @mov_float_m3() { ; CHECK-LABEL: mov_float_m3: ; CHECK: @ %bb.0: @ %entry -; CHECK-NEXT: adr r0, .LCPI19_0 +; CHECK-NEXT: adr r0, .LCPI21_0 ; CHECK-NEXT: vldrw.u32 q0, [r0] ; CHECK-NEXT: bx lr ; CHECK-NEXT: .p2align 4 ; CHECK-NEXT: @ %bb.1: -; CHECK-NEXT: .LCPI19_0: +; CHECK-NEXT: .LCPI21_0: ; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916 ; CHECK-NEXT: .long 3225419776 ; CHECK-NEXT: .long 3225419776 @ double -32.000022917985916 @@ -222,7 +248,7 @@ ret <4 x float> } -define arm_aapcs_vfpcc <8 x half> @mov_float16_1(half *%dest) { +define arm_aapcs_vfpcc <8 x half> @mov_float16_1() { ; CHECK-LABEL: mov_float16_1: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i16 q0, #0x3c00 @@ -232,7 +258,7 @@ ret <8 x half> } -define arm_aapcs_vfpcc <8 x half> @mov_float16_m3(half *%dest) { +define arm_aapcs_vfpcc <8 x half> @mov_float16_m3() { ; CHECK-LABEL: mov_float16_m3: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmov.i16 q0, #0xc200 @@ -241,3 +267,20 @@ entry: ret <8 x half> } + +define arm_aapcs_vfpcc <2 x double> @mov_double_1() { +; CHECK-LABEL: mov_double_1: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: adr r0, .LCPI24_0 +; CHECK-NEXT: vldrw.u32 q0, [r0] +; CHECK-NEXT: bx lr +; CHECK-NEXT: .p2align 4 +; CHECK-NEXT: @ %bb.1: +; CHECK-NEXT: .LCPI24_0: +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 +; CHECK-NEXT: .long 0 @ double 1 +; CHECK-NEXT: .long 1072693248 +entry: + ret <2 x double> +} Index: llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll +++ llvm/test/CodeGen/Thumb2/mve-vmvnimm.ll @@ -2,7 +2,7 @@ ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve -verify-machineinstrs %s -o - | FileCheck %s ; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp -verify-machineinstrs %s -o - | FileCheck %s -define arm_aapcs_vfpcc <8 x i16> @mov_int16_511(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_511() { ; CHECK-LABEL: mov_int16_511: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i16 q0, #0xfe00 @@ -11,7 +11,7 @@ ret <8 x i16> } -define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281(i16 *%dest) { +define arm_aapcs_vfpcc <8 x i16> @mov_int16_65281() { ; CHECK-LABEL: mov_int16_65281: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i16 q0, #0xfe @@ -20,7 +20,7 @@ ret <8 x i16> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m7() { ; CHECK-LABEL: mov_int32_m7: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0x6 @@ -29,7 +29,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m769() { ; CHECK-LABEL: mov_int32_m769: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0x300 @@ -38,7 +38,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m262145() { ; CHECK-LABEL: mov_int32_m262145: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0x40000 @@ -47,7 +47,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_m134217729() { ; CHECK-LABEL: mov_int32_m134217729: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0x8000000 @@ -56,7 +56,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4294902528() { ; CHECK-LABEL: mov_int32_4294902528: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: vmvn.i32 q0, #0xfcff @@ -65,7 +65,7 @@ ret <4 x i32> } -define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688(i32 *%dest) { +define arm_aapcs_vfpcc <4 x i32> @mov_int32_4278386688() { ; CHECK-LABEL: mov_int32_4278386688: ; CHECK: @ %bb.0: @ %entry ; CHECK-NEXT: adr r0, .LCPI7_0