Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -1862,6 +1862,7 @@ RegConstraint<"$Sdin = $Sd">, Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; +// (-(a * b) - dst) -> -(dst + (a * b)) def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; @@ -1869,6 +1870,14 @@ (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; +// (-dst - (a * b)) -> -(dst + (a * b)) +def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))), + (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, + Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; +def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)), + (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), IIC_fpMAC64, "vnmls", ".f64\t$Dd, $Dn, $Dm", Index: test/CodeGen/ARM/fnmscs.ll =================================================================== --- test/CodeGen/ARM/fnmscs.ll +++ test/CodeGen/ARM/fnmscs.ll @@ -1,7 +1,10 @@ -; RUN: llc -mtriple=arm-eabi -mattr=+vfp2 %s -o - \ +; RUN: llc -mtriple=arm-eabihf -mattr=+vfp2 %s -o - \ ; RUN: | FileCheck %s -check-prefix=VFP2 -; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - \ +; RUN: llc -mtriple=arm-eabihf -mattr=+vfp3 %s -o - \ +; RUN: | FileCheck %s -check-prefix=VFP3 + +; RUN: llc -mtriple=arm-eabihf -mattr=+neon %s -o - \ ; RUN: | FileCheck %s -check-prefix=NEON ; RUN: llc -mtriple=arm-eabi -mcpu=cortex-a8 %s -o - \ @@ -21,6 +24,9 @@ ; VFP2-LABEL: t1: ; VFP2: vnmla.f32 +; VFP3-LABEL: t1: +; VFP3: vnmla.f32 + ; NEON-LABEL: t1: ; NEON: vnmla.f32 @@ -42,6 +48,9 @@ ; VFP2-LABEL: t2: ; VFP2: vnmla.f32 +; VFP3-LABEL: t2: +; VFP3: vnmla.f32 + ; NEON-LABEL: t2: ; NEON: vnmla.f32 @@ -63,6 +72,9 @@ ; VFP2-LABEL: t3: ; VFP2: vnmla.f64 +; VFP3-LABEL: t3: +; VFP3: vnmla.f64 + ; NEON-LABEL: t3: ; NEON: vnmla.f64 @@ -84,6 +96,9 @@ ; VFP2-LABEL: t4: ; VFP2: vnmla.f64 +; VFP3-LABEL: t4: +; VFP3: vnmla.f64 + ; NEON-LABEL: t4: ; NEON: vnmla.f64 @@ -99,3 +114,53 @@ %2 = fsub double %1, %acc ret double %2 } + +define double @t5(double %acc, double %a, double %b) nounwind { +entry: +; VFP2-LABEL: t5: +; VFP2: vnmla.f64 + +; VFP3-LABEL: t5: +; VFP3: vnmla.f64 + +; NEON-LABEL: t5: +; NEON: vnmla.f64 + +; A8U-LABEL: t5: +; A8U: vmul.f64 d +; A8U: vsub.f64 d + +; A8-LABEL: t5: +; A8: vmul.f64 d +; A8: vsub.f64 d + + %0 = fsub double -0.0, %acc + %1 = fmul double %a, %b + %2 = fsub double %0, %1 + ret double %2 +} + +define float @t6(float %acc, float %a, float %b) nounwind { +entry: +; VFP2-LABEL: t6: +; VFP2: vnmla.f32 + +; VFP3-LABEL: t6: +; VFP3: vnmla.f32 + +; NEON-LABEL: t6: +; NEON: vnmla.f32 + +; A8U-LABEL: t6: +; A8U: vmul.f32 d +; A8U: vsub.f32 d + +; A8-LABEL: t6: +; A8: vmul.f32 s +; A8: vsub.f32 s + + %0 = fsub float -0.0, %acc + %1 = fmul float %a, %b + %2 = fsub float %0, %1 + ret float %2 +}