Index: llvm/include/llvm/CodeGen/TargetLowering.h =================================================================== --- llvm/include/llvm/CodeGen/TargetLowering.h +++ llvm/include/llvm/CodeGen/TargetLowering.h @@ -2538,7 +2538,13 @@ /// not legal, but should return true if those types will eventually legalize /// to types that support FMAs. After legalization, it will only be called on /// types that support FMAs (via Legal or Custom actions) - virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { + virtual bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT) const { + return false; + } + + /// IR version + virtual bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *) const { return false; } @@ -3768,7 +3774,7 @@ /// Should SelectionDAG lower an atomic store of the given kind as a normal /// StoreSDNode (as opposed to an AtomicSDNode)? NOTE: The intention is to /// eventually migrate all targets to the using StoreSDNodes, but porting is - /// being done target at a time. + /// being done target at a time. virtual bool lowerAtomicStoreAsStoreSDNode(const StoreInst &SI) const { assert(SI.isAtomic() && "violated precondition"); return false; Index: llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp =================================================================== --- llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -1412,7 +1412,8 @@ Register Op1 = getOrCreateVReg(*CI.getArgOperand(1)); Register Op2 = getOrCreateVReg(*CI.getArgOperand(2)); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(TLI.getValueType(*DL, CI.getType()))) { + TLI.isFMAFasterThanFMulAndFAdd(*MF, + TLI.getValueType(*DL, CI.getType()))) { // TODO: Revisit this to see if we should move this part of the // lowering to the combiner. MIRBuilder.buildInstr(TargetOpcode::G_FMA, {Dst}, {Op0, Op1, Op2}, Index: llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -11334,7 +11334,7 @@ // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11551,7 +11551,7 @@ // Floating-point multiply-add without intermediate rounding. bool HasFMA = - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // No valid opcode, do not combine. @@ -11857,7 +11857,7 @@ // Floating-point multiply-add without intermediate rounding. bool HasFMA = (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath) && - TLI.isFMAFasterThanFMulAndFAdd(VT) && + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT) && (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT)); // Floating-point multiply-add with intermediate rounding. This can result Index: llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp =================================================================== --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -6143,7 +6143,7 @@ case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanFMulAndFAdd(VT)) { + TLI.isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), Index: llvm/lib/Target/AArch64/AArch64ISelLowering.h =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -396,7 +396,9 @@ /// Return true if an FMA operation is faster than a pair of fmul and fadd /// instructions. fmuladd intrinsics will be expanded to FMAs when this method /// returns true, otherwise fmuladd is expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const Function &F, Type *Ty) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; Index: llvm/lib/Target/AArch64/AArch64ISelLowering.cpp =================================================================== --- llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -8524,11 +8524,12 @@ return true; const TargetOptions &Options = getTargetMachine().Options; - const DataLayout &DL = I->getModule()->getDataLayout(); - EVT VT = getValueType(DL, User->getOperand(0)->getType()); + const Function *F = I->getFunction(); + const DataLayout &DL = F->getParent()->getDataLayout(); + Type *Ty = User->getOperand(0)->getType(); - return !(isFMAFasterThanFMulAndFAdd(VT) && - isOperationLegalOrCustom(ISD::FMA, VT) && + return !(isFMAFasterThanFMulAndFAdd(*F, Ty) && + isOperationLegalOrCustom(ISD::FMA, getValueType(DL, Ty)) && (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath)); } @@ -9185,7 +9186,8 @@ return -1; } -bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) @@ -9202,6 +9204,17 @@ return false; } +bool AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(const Function &F, + Type *Ty) const { + switch (Ty->getScalarType()->getTypeID()) { + case Type::FloatTyID: + case Type::DoubleTyID: + return true; + default: + return false; + } +} + const MCPhysReg * AArch64TargetLowering::getScratchRegisters(CallingConv::ID) const { // LR is a callee-save register, but we must treat it as clobbered by any call Index: llvm/lib/Target/AMDGPU/SIISelLowering.h =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.h +++ llvm/lib/Target/AMDGPU/SIISelLowering.h @@ -349,7 +349,8 @@ EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override; MVT getScalarShiftAmountTy(const DataLayout &, EVT) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; bool isFMADLegalForFAddFSub(const SelectionDAG &DAG, const SDNode *N) const override; Index: llvm/lib/Target/AMDGPU/SIISelLowering.cpp =================================================================== --- llvm/lib/Target/AMDGPU/SIISelLowering.cpp +++ llvm/lib/Target/AMDGPU/SIISelLowering.cpp @@ -3913,7 +3913,8 @@ // however does not support denormals, so we do report fma as faster if we have // a fast fma device and require denormals. // -bool SITargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool SITargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { VT = VT.getScalarType(); switch (VT.getSimpleVT().SimpleTy) { @@ -9433,7 +9434,7 @@ if ((Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath || (N0->getFlags().hasAllowContract() && N1->getFlags().hasAllowContract())) && - isFMAFasterThanFMulAndFAdd(VT)) { + isFMAFasterThanFMulAndFAdd(DAG.getMachineFunction(), VT)) { return ISD::FMA; } Index: llvm/lib/Target/ARM/ARMISelLowering.h =================================================================== --- llvm/lib/Target/ARM/ARMISelLowering.h +++ llvm/lib/Target/ARM/ARMISelLowering.h @@ -747,7 +747,10 @@ /// lower a pair of fmul and fadd to the latter so it's not clear that there /// would be a gain or that the gain would be worthwhile enough to risk /// correctness bugs. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override { return false; } + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override { + return false; + } SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; Index: llvm/lib/Target/Hexagon/HexagonISelLowering.h =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLowering.h +++ llvm/lib/Target/Hexagon/HexagonISelLowering.h @@ -137,7 +137,8 @@ /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true (and FMAs are legal), otherwise fmuladd is /// expanded to mul + add. - bool isFMAFasterThanFMulAndFAdd(EVT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &, + EVT) const override; // Should we expand the build vector with shuffles? bool shouldExpandBuildVectorWithShuffles(EVT VT, Index: llvm/lib/Target/Hexagon/HexagonISelLowering.cpp =================================================================== --- llvm/lib/Target/Hexagon/HexagonISelLowering.cpp +++ llvm/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -1847,7 +1847,8 @@ return VT1.getSimpleVT() == MVT::i64 && VT2.getSimpleVT() == MVT::i32; } -bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool HexagonTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { return isOperationLegalOrCustom(ISD::FMA, VT); } Index: llvm/lib/Target/NVPTX/NVPTXISelLowering.h =================================================================== --- llvm/lib/Target/NVPTX/NVPTXISelLowering.h +++ llvm/lib/Target/NVPTX/NVPTXISelLowering.h @@ -538,7 +538,10 @@ bool allowFMA(MachineFunction &MF, CodeGenOpt::Level OptLevel) const; bool allowUnsafeFPMath(MachineFunction &MF) const; - bool isFMAFasterThanFMulAndFAdd(EVT) const override { return true; } + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT) const override { + return true; + } bool enableAggressiveFMAFusion(EVT VT) const override { return true; } Index: llvm/lib/Target/PowerPC/PPCISelLowering.h =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.h +++ llvm/lib/Target/PowerPC/PPCISelLowering.h @@ -900,7 +900,8 @@ /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be /// expanded to FMAs when this method returns true, otherwise fmuladd is /// expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const override; Index: llvm/lib/Target/PowerPC/PPCISelLowering.cpp =================================================================== --- llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -14889,7 +14889,8 @@ return true; } -bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool PPCTargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) Index: llvm/lib/Target/SystemZ/SystemZISelLowering.h =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.h +++ llvm/lib/Target/SystemZ/SystemZISelLowering.h @@ -404,7 +404,8 @@ bool isCheapToSpeculateCtlz() const override { return true; } EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT) const override; - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override; bool isLegalICmpImmediate(int64_t Imm) const override; Index: llvm/lib/Target/SystemZ/SystemZISelLowering.cpp =================================================================== --- llvm/lib/Target/SystemZ/SystemZISelLowering.cpp +++ llvm/lib/Target/SystemZ/SystemZISelLowering.cpp @@ -643,7 +643,8 @@ return VT.changeVectorElementTypeToInteger(); } -bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool SystemZTargetLowering::isFMAFasterThanFMulAndFAdd( + const MachineFunction &MF, EVT VT) const { VT = VT.getScalarType(); if (!VT.isSimple()) Index: llvm/lib/Target/X86/X86ISelLowering.h =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.h +++ llvm/lib/Target/X86/X86ISelLowering.h @@ -1056,7 +1056,8 @@ /// Return true if an FMA operation is faster than a pair of fmul and fadd /// instructions. fmuladd intrinsics will be expanded to FMAs when this /// method returns true, otherwise fmuladd is expanded to fmul + fadd. - bool isFMAFasterThanFMulAndFAdd(EVT VT) const override; + bool isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const override; /// Return true if it's profitable to narrow /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow Index: llvm/lib/Target/X86/X86ISelLowering.cpp =================================================================== --- llvm/lib/Target/X86/X86ISelLowering.cpp +++ llvm/lib/Target/X86/X86ISelLowering.cpp @@ -29099,8 +29099,8 @@ return true; } -bool -X86TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { +bool X86TargetLowering::isFMAFasterThanFMulAndFAdd(const MachineFunction &MF, + EVT VT) const { if (!Subtarget.hasAnyFMA()) return false;