Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2601,6 +2601,10 @@ (v4f32 (MVE_VADDf32 (v4f32 MQPR:$val1), (v4f32 MQPR:$val2)))>; def : Pat<(v8f16 (fadd (v8f16 MQPR:$val1), (v8f16 MQPR:$val2))), (v8f16 (MVE_VADDf16 (v8f16 MQPR:$val1), (v8f16 MQPR:$val2)))>; + def : Pat<(v8f16 (fma (v8f16 QPR:$src1), (v8f16 QPR:$src2), (v8f16 QPR:$src3))), + (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (v4f32 QPR:$src1), (v4f32 QPR:$src2), (v4f32 QPR:$src3))), + (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; } def MVE_VSUBf32 : MVE_VADDSUBFMA_fp<"vsub", "f32", 0b0, 0b0, 0b1, 0b1>; Index: llvm/test/CodeGen/ARM/vfma.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/vfma.ll @@ -0,0 +1,27 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x float> @fma_v4f32(<4 x float> %dst, <4 x float> %s1, <4 x float> %s2) { +; CHECK-LABEL: fma_v4f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vfma.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s1, <4 x float> %s2, <4 x float> %dst) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @fma_v8f16(<8 x half> %dst, <8 x half> %s1, <8 x half> %s2) { +; CHECK-LABEL: fma_v8f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vfma.f16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %s1, <8 x half> %s2, <8 x half> %dst) + ret <8 x half> %0 +} + +; Function Attrs: nounwind readnone speculatable +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) #1 +; Function Attrs: nounwind readnone speculatable +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>) #1