Index: include/llvm/CodeGen/MachineCombinerPattern.h =================================================================== --- include/llvm/CodeGen/MachineCombinerPattern.h +++ include/llvm/CodeGen/MachineCombinerPattern.h @@ -48,6 +48,8 @@ FMULADDD_OP2, FMULSUBD_OP1, FMULSUBD_OP2, + FNMULSUBS_OP1, + FNMULSUBD_OP1, FMLAv1i32_indexed_OP1, FMLAv1i32_indexed_OP2, FMLAv1i64_indexed_OP1, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -3166,6 +3166,13 @@ return canCombine(MBB, MO, MulOpc); } +// +// Is \param MO defined by a floating-point multiply-negate and can be combined? +static bool canCombineWithFNMUL(MachineBasicBlock &MBB, MachineOperand &MO, + unsigned MulOpc) { + return canCombine(MBB, MO, MulOpc); +} + // TODO: There are many more machine instruction opcodes to match: // 1. Other data types (integer, vectors) // 2. Other math / logic operations (xor, or) @@ -3427,6 +3434,10 @@ Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2); Found = true; } + if (canCombineWithFNMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) { + Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1); + Found = true; + } break; case AArch64::FSUBDrr: if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) { @@ -3441,6 +3452,10 @@ Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2); Found = true; } + if (canCombineWithFNMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) { + Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1); + Found = true; + } break; case AArch64::FSUBv2f32: if (canCombineWithFMUL(MBB, Root.getOperand(2), @@ -3495,6 +3510,8 @@ case MachineCombinerPattern::FMULADDD_OP2: case MachineCombinerPattern::FMULSUBD_OP1: case MachineCombinerPattern::FMULSUBD_OP2: + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP1: case MachineCombinerPattern::FMLAv1i32_indexed_OP2: case MachineCombinerPattern::FMLAv1i64_indexed_OP1: @@ -3996,6 +4013,24 @@ MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); break; } + + case MachineCombinerPattern::FNMULSUBS_OP1: + case MachineCombinerPattern::FNMULSUBD_OP1: { + // FNMUL I=A,B,0 + // FSUB R,I,C + // ==> FNMADD R,A,B,C // = -A*B - C + // --- Create(FNMADD); + if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) { + Opc = AArch64::FNMADDSrrr; + RC = &AArch64::FPR32RegClass; + } else { + Opc = AArch64::FNMADDDrrr; + RC = &AArch64::FPR64RegClass; + } + MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC); + break; + } + case MachineCombinerPattern::FMULSUBS_OP2: case MachineCombinerPattern::FMULSUBD_OP2: { // FMUL I=A,B,0 Index: test/CodeGen/AArch64/arm64-fml-combines.ll =================================================================== --- test/CodeGen/AArch64/arm64-fml-combines.ll +++ test/CodeGen/AArch64/arm64-fml-combines.ll @@ -1,4 +1,5 @@ -; RUN: llc < %s -O=3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s +; RUN: llc < %s -O3 -mtriple=arm64-apple-ios -mcpu=cyclone -enable-unsafe-fp-math | FileCheck %s + define void @foo_2d(double* %src) { entry: %arrayidx1 = getelementptr inbounds double, double* %src, i64 5 @@ -126,3 +127,23 @@ for.end: ; preds = %for.body ret void } + +; CHECK-LABEL: test1: +; CHECK: fnmadd s0, s0, s1, s2 +define float @test1(float %a, float %b, float %c) { +entry: + %0 = fmul contract float %a, %b + %mul = fsub contract float -0.000000e+00, %0 + %sub1 = fsub contract float %mul, %c + ret float %sub1 +} + +; CHECK-LABEL: test2: +; CHECK: fnmadd d0, d0, d1, d2 +define double @test2(double %a, double %b, double %c) { +entry: + %0 = fmul contract double %a, %b + %mul = fsub contract double -0.000000e+00, %0 + %sub1 = fsub contract double %mul, %c + ret double %sub1 +}