Index: llvm/lib/Target/AArch64/AArch64InstrInfo.td =================================================================== --- llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -3315,20 +3315,37 @@ // N.b. FMSUB etc have the accumulator at the *end* of (outs), unlike // the NEON variant. + +// Here we handle first -(a + b*c) for FNMADD: + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, FPR16:$Ra)), + (FMSUBHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, FPR32:$Ra)), (FMSUBSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, FPR64:$Ra)), (FMSUBDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; -// We handled -(a + b*c) for FNMADD above, now it's time for "(-a) + (-b)*c" and -// "(-a) + b*(-c)". +// Now it's time for "(-a) + (-b)*c" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma (fneg FPR16:$Rn), FPR16:$Rm, (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma (fneg FPR32:$Rn), FPR32:$Rm, (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; def : Pat<(f64 (fma (fneg FPR64:$Rn), FPR64:$Rm, (fneg FPR64:$Ra))), (FNMADDDrrr FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +// And here "(-a) + b*(-c)" + +let Predicates = [HasNEON, HasFullFP16] in +def : Pat<(f16 (fma FPR16:$Rn, (fneg FPR16:$Rm), (fneg FPR16:$Ra))), + (FNMADDHrrr FPR16:$Rn, FPR16:$Rm, FPR16:$Ra)>; + def : Pat<(f32 (fma FPR32:$Rn, (fneg FPR32:$Rm), (fneg FPR32:$Ra))), (FNMADDSrrr FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll +++ llvm/test/CodeGen/AArch64/fp16_intrinsic_lane.ll @@ -175,8 +175,7 @@ define dso_local half @t_vfmsh_lane_f16(half %a, half %b, <4 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsh_lane_f16: -; CHECK: fneg h1, h1 -; CHECK: fmadd h0, h1, h2, h0 +; CHECK: fmsub h0, h1, h2, h0 ; CHECK-NEXT: ret entry: %0 = fsub half 0xH8000, %b @@ -187,9 +186,8 @@ define dso_local half @t_vfmsh_laneq_f16(half %a, half %b, <8 x half> %c, i32 %lane) { ; CHECK-LABEL: t_vfmsh_laneq_f16: -; CHECK: fneg h1, h1 -; CHECK-NEXT: fmadd h0, h1, h2, h0 -; CHECK-NEXT: ret +; CHECK: fmsub h0, h1, h2, h0 +; CHECK-NEXT: ret entry: %0 = fsub half 0xH8000, %b %extract = extractelement <8 x half> %c, i32 0 Index: llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll =================================================================== --- llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll +++ llvm/test/CodeGen/AArch64/fp16_intrinsic_scalar_3op.ll @@ -10,44 +10,83 @@ } define half @fnma16(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fnma16: ; CHECK: fnmadd h0, h0, h1, h2 +entry: %0 = tail call half @llvm.fma.f16(half %a, half %b, half %c) %mul = fmul half %0, -1.000000e+00 ret half %mul } define half @fms16(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fms16: ; CHECK: fmsub h0, h0, h1, h2 +entry: %mul = fmul half %b, -1.000000e+00 %0 = tail call half @llvm.fma.f16(half %a, half %mul, half %c) ret half %0 } define half @fms16_com(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fms16_com: - -; FIXME: This should be a fmsub. - -; CHECK: fneg h1, h1 -; CHECK-NEXT: fmadd h0, h1, h0, h2 +; CHECK: fmsub h0, h1, h0, h2 +; CHECK-NEXT: ret +entry: %mul = fmul half %b, -1.000000e+00 %0 = tail call half @llvm.fma.f16(half %mul, half %a, half %c) ret half %0 } define half @fnms16(half %a, half %b, half %c) nounwind readnone ssp { -entry: ; CHECK-LABEL: fnms16: -; CHECK: fnmsub h0, h0, h1, h2 +; CHECK: fnmsub h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: %mul = fmul half %c, -1.000000e+00 %0 = tail call half @llvm.fma.f16(half %a, half %b, half %mul) ret half %0 } -declare half @llvm.fma.f16(half, half, half) +define half @test_fmsub(half %a, half %b, half %c) { +; CHECK-LABEL: test_fmsub: +; CHECK: fmsub h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %nega = fsub half -0.0, %a + %val = call half @llvm.fma.f16(half %nega, half %b, half %c) + ret half %val +} + +define half @test_fnmadd(half %a, half %b, half %c) { +; CHECK-LABEL: test_fnmadd: +; CHECK: fnmadd h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %nega = fsub half -0.0, %a + %negc = fsub half -0.0, %c + %val = call half @llvm.fma.f16(half %nega, half %b, half %negc) + ret half %val +} +define half @test_fmadd(half %a, half %b, half %c) { +; CHECK-LABEL: test_fmadd: +; CHECK: fmadd h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %nega = fsub half -0.0, %a + %negb = fsub half -0.0, %b + %val = call half @llvm.fma.f16(half %nega, half %negb, half %c) + ret half %val +} + +define half @test_fnmsub(half %a, half %b, half %c) { +; CHECK-LABEL: test_fnmsub: +; CHECK: fnmsub h0, h0, h1, h2 +; CHECK-NEXT: ret +entry: + %negc = fsub half -0.0, %c + %val = call half @llvm.fma.f16(half %a, half %b, half %negc) + ret half %val +} + +declare half @llvm.fma.f16(half, half, half)