Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -366,12 +366,14 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; // Prefer fused MAC for fp mul + add over fp VMLA / VMLS if they are available. -// But only select them if more precision in FP computation is allowed. +// But only select them if more precision in FP computation is allowed, and when +// they are not slower than a mul + add sequence. // Do not use them for Darwin platforms. def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast && " " Subtarget->hasVFP4()) && " - "!Subtarget->isTargetDarwin()">; + "!Subtarget->isTargetDarwin() &&" + "Subtarget->useFPVMLx()">; def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; Index: test/CodeGen/ARM/fusedMAC.ll =================================================================== --- test/CodeGen/ARM/fusedMAC.ll +++ test/CodeGen/ARM/fusedMAC.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast | FileCheck %s +; RUN: llc < %s -mtriple=armv7-eabi -mattr=+neon,+vfp4 -fp-contract=fast -mcpu=cortex-m33 | FileCheck %s -check-prefix=SLOW-MAC ; Check generated fused MAC and MLS. define double @fusedMACTest1(double %d1, double %d2, double %d3) { @@ -12,6 +13,11 @@ define float @fusedMACTest2(float %f1, float %f2, float %f3) { ;CHECK-LABEL: fusedMACTest2: ;CHECK: vfma.f32 + +;SLOW-MAC-LABEL: fusedMACTest2: +;SLOW-MAC: vmul.f32 +;SLOW-MAC-NEXT: vadd.f32 + %1 = fmul float %f1, %f2 %2 = fadd float %1, %f3 ret float %2