diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -292,6 +292,11 @@ def AArch64fabd_p : PatFrag<(ops node:$pg, node:$op1, node:$op2), (AArch64fabs_mt node:$pg, (AArch64fsub_p node:$pg, node:$op1, node:$op2), undef)>; +// FMAs with a negated multiplication operand can be commuted. +def AArch64fmls_p : PatFrags<(ops node:$pred, node:$op1, node:$op2, node:$op3), + [(AArch64fma_p node:$pred, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op2, node:$op3), + (AArch64fma_p node:$pred, node:$op2, (AArch64fneg_mt node:$pred, node:$op1, (undef)), node:$op3)]>; + def AArch64fneg_mt_nsz : PatFrag<(ops node:$pred, node:$op, node:$pt), (AArch64fneg_mt node:$pred, node:$op, node:$pt), [{ return N->getFlags().hasNoSignedZeros(); @@ -552,7 +557,7 @@ (!cast("FMLA_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; // Zd = Za + -Zn * Zm - def : Pat<(Ty (AArch64fma_p PredTy:$P, (AArch64fneg_mt PredTy:$P, Ty:$Zn, (Ty (undef))), Ty:$Zm, Ty:$Za)), + def : Pat<(Ty (AArch64fmls_p PredTy:$P, Ty:$Zn, Ty:$Zm, Ty:$Za)), (!cast("FMLS_ZPZZZ_UNDEF_"#Suffix) $P, ZPR:$Za, ZPR:$Zn, ZPR:$Zm)>; // Zd = -Za + Zn * Zm diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll --- a/llvm/test/CodeGen/AArch64/sve-fp.ll +++ b/llvm/test/CodeGen/AArch64/sve-fp.ll @@ -312,6 +312,72 @@ ret %r } +define @fmls_nxv8f16( %a, %b, %c) { +; CHECK-LABEL: fmls_nxv8f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.h +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %neg = fneg %b + %r = call @llvm.fma.nxv8f16( %c, %neg, %a) + ret %r +} + +define @fmls_nxv4f16( %a, %b, %c) { +; CHECK-LABEL: fmls_nxv4f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %neg = fneg %b + %r = call @llvm.fma.nxv4f16( %c, %neg, %a) + ret %r +} + +define @fmls_nxv2f16( %a, %b, %c) { +; CHECK-LABEL: fmls_nxv2f16: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmls z0.h, p0/m, z1.h, z2.h +; CHECK-NEXT: ret + %neg = fneg %b + %r = call @llvm.fma.nxv2f16( %c, %neg, %a) + ret %r +} + +define @fmls_nxv4f32( %a, %b, %c) { +; CHECK-LABEL: fmls_nxv4f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.s +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %neg = fneg %b + %r = call @llvm.fma.nxv4f32( %c, %neg, %a) + ret %r +} + +define @fmls_nxv2f32( %a, %b, %c) { +; CHECK-LABEL: fmls_nxv2f32: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmls z0.s, p0/m, z1.s, z2.s +; CHECK-NEXT: ret + %neg = fneg %b + %r = call @llvm.fma.nxv2f32( %c, %neg, %a) + ret %r +} + +define @fmls_nxv2f64( %a, %b, %c) { +; CHECK-LABEL: fmls_nxv2f64: +; CHECK: // %bb.0: +; CHECK-NEXT: ptrue p0.d +; CHECK-NEXT: fmls z0.d, p0/m, z1.d, z2.d +; CHECK-NEXT: ret + %neg = fneg %b + %r = call @llvm.fma.nxv2f64( %c, %neg, %a) + ret %r +} + define @fneg_nxv8f16( %a) { ; CHECK-LABEL: fneg_nxv8f16: ; CHECK: // %bb.0: