Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2060,9 +2060,23 @@ def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; +let Predicates = [HasMVEFloat] in { + def : Pat<(v4f32 (fadd (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), + (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; + def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), + (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; +} + def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; def MVE_VSUBf16 : MVE_VADDSUBFMA_fp<"vsub", "f16", 0b1, 0b0, 0b1, 0b1>; +let Predicates = [HasMVEFloat] in { + def : Pat<(v4f32 (fsub (v4f32 MQPR:$val1), (v4f32 MQPR:$val2))), + (v4f32 (MVE_VSUBf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; + def : Pat<(v8f16 (fsub (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), + (v8f16 (MVE_VSUBf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; +} + class MVE_VCADD pattern=[]> : MVEFloatArithNeon<"vcadd", suffix, size, (outs MQPR:$Qd), (ins MQPR:$Qn, MQPR:$Qm, complexrotateopodd:$rot), Index: llvm/test/CodeGen/Thumb2/mve-simple-arith.ll =================================================================== --- llvm/test/CodeGen/Thumb2/mve-simple-arith.ll +++ llvm/test/CodeGen/Thumb2/mve-simple-arith.ll @@ -31,6 +31,26 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <8 x half> @add_float16_t(<8 x half> %src1, <8 x half> %src2) { +; CHECK-LABEL: add_float16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = fadd nnan ninf nsz <8 x half> %src2, %src1 + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @add_float32_t(<4 x float> %src1, <4 x float> %src2) { +; CHECK-LABEL: add_float32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vadd.f32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = fadd nnan ninf nsz <4 x float> %src2, %src1 + ret <4 x float> %0 +} + define arm_aapcs_vfpcc <16 x i8> @sub_int8_t(<16 x i8> %src1, <16 x i8> %src2) { ; CHECK-LABEL: sub_int8_t: @@ -62,3 +82,23 @@ ret <4 x i32> %0 } +define arm_aapcs_vfpcc <8 x half> @sub_float16_t(<8 x half> %src1, <8 x half> %src2) { +; CHECK-LABEL: sub_float16_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.f16 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = fsub nnan ninf nsz <8 x half> %src2, %src1 + ret <8 x half> %0 +} + +define arm_aapcs_vfpcc <4 x float> @sub_float32_t(<4 x float> %src1, <4 x float> %src2) { +; CHECK-LABEL: sub_float32_t: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vsub.f32 q0, q1, q0 +; CHECK-NEXT: bx lr +entry: + %0 = fsub nnan ninf nsz <4 x float> %src2, %src1 + ret <4 x float> %0 +} +