diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3861,7 +3861,7 @@ return false; } -// FP Opcodes that can be combined with a FMUL +// FP Opcodes that can be combined with a FMUL. static bool isCombineInstrCandidateFP(const MachineInstr &Inst) { switch (Inst.getOpcode()) { default: @@ -3883,8 +3883,12 @@ case AArch64::FSUBv2f64: case AArch64::FSUBv4f32: TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options; - return (Options.UnsafeFPMath || - Options.AllowFPOpFusion == FPOpFusion::Fast); + // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by + // the target options or if FADD/FSUB has the contract fast-math flag. + return Options.UnsafeFPMath || + Options.AllowFPOpFusion == FPOpFusion::Fast || + Inst.getFlag(MachineInstr::FmContract); + return true; } return false; } diff --git a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir --- a/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir +++ b/llvm/test/CodeGen/AArch64/machine-combiner-instr-fmf.mir @@ -6,8 +6,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = nnan ninf nsz arcp contract afn reassoc FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = nnan ninf nsz arcp contract afn reassoc FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_fast alignment: 4 @@ -46,8 +45,7 @@ # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = contract FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract @@ -81,7 +79,7 @@ ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do not create FMADD, because we don't have the contract flag on the FADD. # CHECK-LABEL: name: scalar_fmadd_contract_op0 # CHECK: [[C:%.*]]:fpr32 = COPY $s2 @@ -121,14 +119,13 @@ ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do create FMADD, because we have the contract flag on the FADD. # # CHECK-LABEL: name: scalar_fmadd_contract_op1 # CHECK: [[C:%.*]]:fpr32 = COPY $s2 # CHECK-NEXT: [[B:%.*]]:fpr32 = COPY $s1 # CHECK-NEXT: [[A:%.*]]:fpr32 = COPY $s0 -# CHECK-NEXT: [[MUL:%.*]]:fpr32 = FMULSrr [[B]], [[A]] -# CHECK-NEXT: fpr32 = contract FADDSrr killed [[MUL]], [[C]] +# CHECK-NEXT: :fpr32 = FMADDSrrr [[B]], [[A]], [[C]] --- name: scalar_fmadd_contract_op1 @@ -203,14 +200,13 @@ ... -# Can create FMADD, because both the fmul and fadd have all fast-math flags. +# Can create FMLA, because both the fmul and fadd have all fast-math flags. # # CHECK-LABEL: name: vector_fmadd_fast # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = nnan ninf nsz arcp contract afn reassoc FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = nnan ninf nsz arcp contract afn reassoc FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_fast alignment: 4 @@ -243,14 +239,13 @@ ... -# Can create FMADD, because both the fmul and fadd have the contract fast-math flag. +# Can create FMLA, because both the fmul and fadd have the contract fast-math flag. # # CHECK-LABEL: name: vector_fmadd_contract # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = contract FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract alignment: 4 @@ -283,7 +278,7 @@ ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do not create FMLA, because we don't have the contract flag on the FADD. # # CHECK-LABEL: name: vector_fmadd_contract_op0 # CHECK: [[C:%.*]]:fpr128 = COPY $q2 @@ -323,14 +318,13 @@ ... -# Do not create FMADD, because we don't have the contract flag on moth instructions. +# Do create FMLA, because we have the contract flag on the FADD. # # CHECK-LABEL: name: vector_fmadd_contract_op1 # CHECK: [[C:%.*]]:fpr128 = COPY $q2 # CHECK-NEXT: [[B:%.*]]:fpr128 = COPY $q1 # CHECK-NEXT: [[A:%.*]]:fpr128 = COPY $q0 -# CHECK-NEXT: [[MUL:%.*]]:fpr128 = FMULv2f64 [[B]], [[A]] -# CHECK-NEXT: fpr128 = contract FADDv2f64 killed [[MUL]], [[C]] +# CHECK-NEXT: fpr128 = FMLAv2f64 [[C]], [[B]], [[A]] --- name: vector_fmadd_contract_op1 @@ -364,7 +358,7 @@ ... -# Do not create FMADD, as nsz flag does not allow it. +# Do not create FMLA, as nsz flag does not allow it. # # CHECK-LABEL: name: vector_fmadd_nsz # CHECK: [[C:%.*]]:fpr128 = COPY $q2 diff --git a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll --- a/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll +++ b/llvm/test/CodeGen/AArch64/neon-fma-FMF.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s +; RUN: llc < %s -O3 -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s define <2 x float> @fma_1(<2 x float> %A, <2 x float> %B, <2 x float> %C) { ; CHECK-LABEL: fma_1: