Index: llvm/lib/Target/AArch64/AArch64.td =================================================================== --- llvm/lib/Target/AArch64/AArch64.td +++ llvm/lib/Target/AArch64/AArch64.td @@ -105,6 +105,9 @@ def FeatureUseRSqrt : SubtargetFeature< "use-reverse-square-root", "UseRSqrt", "true", "Use reverse square root">; +def FeatureHasFastFMA : SubtargetFeature< + "has-fast-fma", "HasFastFMA", "true", "Use FMA aggressively">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -222,6 +225,7 @@ FeatureCrypto, FeatureCustomCheapAsMoveHandling, FeatureFPARMv8, + FeatureHasFastFMA, FeatureNEON, FeaturePerfMon, FeaturePostRAScheduler, Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -333,6 +333,9 @@ /// returns true, otherwise fmuladd is expanded to fmul + fadd. bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + /// Return true is FMA operations should be used aggressively.. + bool enableAggressiveFMAFusion(EVT VT) const override; + const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; /// \brief Returns false if N is a bit extraction pattern of (X >> C) & Mask. Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -7374,6 +7374,10 @@ return false; } +bool AArch64TargetLowering::enableAggressiveFMAFusion(EVT VT) const { + return Subtarget->hasFastFMA() && isFMAFasterThanFMulAndFAdd(VT); +} + const MCPhysReg * AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call Index: llvm/lib/Target/AArch64/AArch64Subtarget.h =================================================================== --- llvm/lib/Target/AArch64/AArch64Subtarget.h +++ llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -83,6 +83,7 @@ bool HasMacroOpFusion = false; bool DisableLatencySchedHeuristic = false; bool UseRSqrt = false; + bool HasFastFMA = false; uint8_t MaxInterleaveFactor = 2; uint8_t VectorInsertExtractBaseCost = 3; uint16_t CacheLineSize = 0; @@ -190,6 +191,7 @@ } bool hasMacroOpFusion() const { return HasMacroOpFusion; } bool useRSqrt() const { return UseRSqrt; } + bool hasFastFMA() const { return HasFastFMA; } unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; } unsigned getVectorInsertExtractBaseCost() const { return VectorInsertExtractBaseCost;