Index: llvm/lib/Target/ARM/ARMInstrMVE.td =================================================================== --- llvm/lib/Target/ARM/ARMInstrMVE.td +++ llvm/lib/Target/ARM/ARMInstrMVE.td @@ -2592,6 +2592,13 @@ (v4f32 (MVE_VFMSf32 $src1, $src2, $src3))>; } +let Predicates = [HasMVEFloat] in { + def : Pat<(v8f16 (fma (v8f16 MQPR:$src1), (v8f16 MQPR:$src2), (v8f16 MQPR:$src3))), + (v8f16 (MVE_VFMAf16 $src3, $src1, $src2))>; + def : Pat<(v4f32 (fma (v4f32 MQPR:$src1), (v4f32 MQPR:$src2), (v4f32 MQPR:$src3))), + (v4f32 (MVE_VFMAf32 $src3, $src1, $src2))>; +} + def MVE_VADDf32 : MVE_VADDSUBFMA_fp<"vadd", "f32", 0b0, 0b0, 0b1, 0b0>; def MVE_VADDf16 : MVE_VADDSUBFMA_fp<"vadd", "f16", 0b1, 0b0, 0b1, 0b0>; Index: llvm/test/CodeGen/Thumb2/mve-vfma.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/mve-vfma.ll @@ -0,0 +1,24 @@ +; RUN: llc -mtriple=thumbv8.1m.main-arm-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s + +define arm_aapcs_vfpcc <4 x float> @fma_v4f32(<4 x float> %dst, <4 x float> %s1, <4 x float> %s2) { +; CHECK-LABEL: fma_v4f32: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vfma.f32 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = tail call fast <4 x float> @llvm.fma.v4f32(<4 x float> %s1, <4 x float> %s2, <4 x float> %dst) + ret <4 x float> %0 +} + +define arm_aapcs_vfpcc <8 x half> @fma_v8f16(<8 x half> %dst, <8 x half> %s1, <8 x half> %s2) { +; CHECK-LABEL: fma_v8f16: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: vfma.f16 q0, q1, q2 +; CHECK-NEXT: bx lr +entry: + %0 = tail call fast <8 x half> @llvm.fma.v8f16(<8 x half> %s1, <8 x half> %s2, <8 x half> %dst) + ret <8 x half> %0 +} + +declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>) +declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)