diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3861,7 +3861,7 @@ return false; } -// FP Opcodes that can be combined with a FMUL +// FP Opcodes that can be combined with a FMUL. static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: @@ -3882,9 +3882,7 @@ case AArch64::FSUBv2f32: case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: - TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - return (Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast); + return true; } return false; } @@ -3899,7 +3897,8 @@ // \param CombineOpc instruction in the basic block \param MBB static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg = 0, - bool CheckZeroReg = false) { + bool CheckZeroReg = false, + bool CanFuseWithContractFlag = false) { MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo(); MachineInstr *MI = nullptr; @@ -3912,6 +3911,11 @@ if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg())) return false; + // If we can only fuse if both instructions have the contract fast-math flag, + // bail out if it is missing. + if (CanFuseWithContractFlag && !MI->getFlag(MachineInstr::FmContract)) + return false; + if (CheckZeroReg) { assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() && MI->getOperand(1).isReg() && MI->getOperand(2).isReg() && @@ -3934,8 +3938,9 @@ // // Is \param MO defined by a floating-point multiply and can be combined? static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, - unsigned MulOpc) { - return canCombine(MBB, MO, MulOpc); + unsigned MulOpc, bool CanFuseWithContractFlag) { + return canCombine(MBB, MO, MulOpc, /*ZeroReg=*/0, /*CheckZeroReg=*/false, + CanFuseWithContractFlag); } // TODO: There are many more machine instruction opcodes to match: @@ -4114,12 +4119,23 @@ if (!isCombineInstrCandidateFP(Root)) return false; + // We can fuse FMUL & FADD, if fusion is either allowed globally by the target + // options or if both instructions have the contract fast-math flag. + TargetOptions Options = Root.getParent()->getParent()->getTarget().Options; + bool CanFuseUnconditionally = + Options.UnsafeFPMath || Options.AllowFPOpFusion == FPOpFusion::Fast; + bool CanFuseWithContractFlag = + !CanFuseUnconditionally && Root.getFlag(MachineInstr::FmContract); + if (!CanFuseUnconditionally && !CanFuseWithContractFlag) + return false; + MachineBasicBlock &MBB = *Root.getParent(); bool Found = false; auto Match = [&](int Opcode, int Operand, MachineCombinerPattern Pattern) -> bool { - if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) { + if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode, + CanFuseWithContractFlag)) { Patterns.push_back(Pattern); return true; } diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir @@ -6,8 +6,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_fast alignment: 4 @@ -46,8 +45,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract @@ -209,8 +207,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_fast alignment: 4 @@ -249,8 +246,7 @@ # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract alignment: 4