diff --git a/llvm/lib/Target/ARM/ARMInstrVFP.td b/llvm/lib/Target/ARM/ARMInstrVFP.td --- a/llvm/lib/Target/ARM/ARMInstrVFP.td +++ b/llvm/lib/Target/ARM/ARMInstrVFP.td @@ -2143,6 +2143,9 @@ def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin)), + (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; // (fma x, (fneg y), z) -> (vfms z, x, y) def : Pat<(f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin)), (VFMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, @@ -2150,6 +2153,9 @@ def : Pat<(f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin)), (VFMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin)), + (VFMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; def VFNMAD : ADbI<0b11101, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -2196,6 +2202,9 @@ def : Pat<(fneg (fma (f32 SPR:$Sn), (f32 SPR:$Sm), (f32 SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(fneg (fma (f16 HPR:$Sn), (f16 HPR:$Sm), (f16 HPR:$Sdin))), + (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; // (fma (fneg x), y, (fneg z)) -> (vfnma z, x, y) def : Pat<(f64 (fma (fneg DPR:$Dn), DPR:$Dm, (fneg DPR:$Ddin))), (VFNMAD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, @@ -2203,6 +2212,9 @@ def : Pat<(f32 (fma (fneg SPR:$Sn), SPR:$Sm, (fneg SPR:$Sdin))), (VFNMAS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(f16 (fma (fneg HPR:$Sn), HPR:$Sm, (fneg HPR:$Sdin))), + (VFNMAH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; def VFNMSD : ADbI<0b11101, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -2248,6 +2260,9 @@ def : Pat<(f32 (fma SPR:$Sn, SPR:$Sm, (fneg SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(f16 (fma HPR:$Sn, HPR:$Sm, (fneg HPR:$Sdin))), + (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; // (fneg (fma (fneg x), y, z)) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma (fneg DPR:$Dn), DPR:$Dm, DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, @@ -2255,6 +2270,9 @@ def : Pat<(fneg (f32 (fma (fneg SPR:$Sn), SPR:$Sm, SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(fneg (f16 (fma (fneg HPR:$Sn), HPR:$Sm, HPR:$Sdin))), + (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; // (fneg (fma x, (fneg y), z) -> (vfnms z, x, y) def : Pat<(fneg (f64 (fma DPR:$Dn, (fneg DPR:$Dm), DPR:$Ddin))), (VFNMSD DPR:$Ddin, DPR:$Dn, DPR:$Dm)>, @@ -2262,6 +2280,9 @@ def : Pat<(fneg (f32 (fma SPR:$Sn, (fneg SPR:$Sm), SPR:$Sdin))), (VFNMSS SPR:$Sdin, SPR:$Sn, SPR:$Sm)>, Requires<[HasVFP4]>; +def : Pat<(fneg (f16 (fma HPR:$Sn, (fneg HPR:$Sm), HPR:$Sdin))), + (VFNMSH HPR:$Sdin, HPR:$Sn, HPR:$Sm)>, + Requires<[HasFullFP16]>; //===----------------------------------------------------------------------===// // FP Conditional moves. diff --git a/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll b/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll --- a/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll +++ b/llvm/test/CodeGen/ARM/fp16-fusedMAC.ll @@ -230,21 +230,19 @@ ; CHECK-LABEL: fms1: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] -; CHECK-NEXT: vldr.16 s2, [r2] -; CHECK-NEXT: vldr.16 s4, [r0] -; CHECK-NEXT: vneg.f16 s4, s4 -; CHECK-NEXT: vfma.f16 s2, s4, s0 -; CHECK-NEXT: vstr.16 s2, [r0] +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vldr.16 s4, [r2] +; CHECK-NEXT: vfms.f16 s4, s2, s0 +; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fms1: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] -; DONT-FUSE-NEXT: vldr.16 s2, [r2] -; DONT-FUSE-NEXT: vldr.16 s4, [r0] -; DONT-FUSE-NEXT: vneg.f16 s4, s4 -; DONT-FUSE-NEXT: vfma.f16 s2, s4, s0 -; DONT-FUSE-NEXT: vstr.16 s2, [r0] +; DONT-FUSE-NEXT: vldr.16 s2, [r0] +; DONT-FUSE-NEXT: vldr.16 s4, [r2] +; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 @@ -259,22 +257,20 @@ define arm_aapcs_vfpcc void @fms2(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fms2: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr.16 s0, [r1] -; CHECK-NEXT: vldr.16 s2, [r2] -; CHECK-NEXT: vldr.16 s4, [r0] -; CHECK-NEXT: vneg.f16 s4, s4 -; CHECK-NEXT: vfma.f16 s2, s0, s4 -; CHECK-NEXT: vstr.16 s2, [r0] +; CHECK-NEXT: vldr.16 s0, [r0] +; CHECK-NEXT: vldr.16 s2, [r1] +; CHECK-NEXT: vldr.16 s4, [r2] +; CHECK-NEXT: vfms.f16 s4, s2, s0 +; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fms2: ; DONT-FUSE: @ %bb.0: -; DONT-FUSE-NEXT: vldr.16 s0, [r1] -; DONT-FUSE-NEXT: vldr.16 s2, [r2] -; DONT-FUSE-NEXT: vldr.16 s4, [r0] -; DONT-FUSE-NEXT: vneg.f16 s4, s4 -; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4 -; DONT-FUSE-NEXT: vstr.16 s2, [r0] +; DONT-FUSE-NEXT: vldr.16 s0, [r0] +; DONT-FUSE-NEXT: vldr.16 s2, [r1] +; DONT-FUSE-NEXT: vldr.16 s4, [r2] +; DONT-FUSE-NEXT: vfms.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 @@ -292,9 +288,8 @@ ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] -; CHECK-NEXT: vfma.f16 s4, s2, s0 -; CHECK-NEXT: vneg.f16 s0, s4 -; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: vfnma.f16 s4, s2, s0 +; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnma1: @@ -302,9 +297,8 @@ ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] -; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 -; DONT-FUSE-NEXT: vneg.f16 s0, s4 -; DONT-FUSE-NEXT: vstr.16 s0, [r0] +; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 @@ -321,10 +315,8 @@ ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] -; CHECK-NEXT: vneg.f16 s2, s2 ; CHECK-NEXT: vldr.16 s4, [r2] -; CHECK-NEXT: vneg.f16 s4, s4 -; CHECK-NEXT: vfma.f16 s4, s2, s0 +; CHECK-NEXT: vfnma.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; @@ -332,10 +324,8 @@ ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] -; DONT-FUSE-NEXT: vneg.f16 s2, s2 ; DONT-FUSE-NEXT: vldr.16 s4, [r2] -; DONT-FUSE-NEXT: vneg.f16 s4, s4 -; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vfnma.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr @@ -355,8 +345,7 @@ ; CHECK-NEXT: vldr.16 s0, [r1] ; CHECK-NEXT: vldr.16 s2, [r0] ; CHECK-NEXT: vldr.16 s4, [r2] -; CHECK-NEXT: vneg.f16 s4, s4 -; CHECK-NEXT: vfma.f16 s4, s2, s0 +; CHECK-NEXT: vfnms.f16 s4, s2, s0 ; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; @@ -365,8 +354,7 @@ ; DONT-FUSE-NEXT: vldr.16 s0, [r1] ; DONT-FUSE-NEXT: vldr.16 s2, [r0] ; DONT-FUSE-NEXT: vldr.16 s4, [r2] -; DONT-FUSE-NEXT: vneg.f16 s4, s4 -; DONT-FUSE-NEXT: vfma.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 ; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr @@ -383,23 +371,19 @@ ; CHECK-LABEL: fnms2: ; CHECK: @ %bb.0: ; CHECK-NEXT: vldr.16 s0, [r1] -; CHECK-NEXT: vldr.16 s2, [r2] -; CHECK-NEXT: vldr.16 s4, [r0] -; CHECK-NEXT: vneg.f16 s4, s4 -; CHECK-NEXT: vfma.f16 s2, s4, s0 -; CHECK-NEXT: vneg.f16 s0, s2 -; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vldr.16 s4, [r2] +; CHECK-NEXT: vfnms.f16 s4, s2, s0 +; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnms2: ; DONT-FUSE: @ %bb.0: ; DONT-FUSE-NEXT: vldr.16 s0, [r1] -; DONT-FUSE-NEXT: vldr.16 s2, [r2] -; DONT-FUSE-NEXT: vldr.16 s4, [r0] -; DONT-FUSE-NEXT: vneg.f16 s4, s4 -; DONT-FUSE-NEXT: vfma.f16 s2, s4, s0 -; DONT-FUSE-NEXT: vneg.f16 s0, s2 -; DONT-FUSE-NEXT: vstr.16 s0, [r0] +; DONT-FUSE-NEXT: vldr.16 s2, [r0] +; DONT-FUSE-NEXT: vldr.16 s4, [r2] +; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2 @@ -415,24 +399,20 @@ define arm_aapcs_vfpcc void @fnms3(half *%a1, half *%a2, half *%a3) { ; CHECK-LABEL: fnms3: ; CHECK: @ %bb.0: -; CHECK-NEXT: vldr.16 s0, [r0] -; CHECK-NEXT: vldr.16 s2, [r2] -; CHECK-NEXT: vldr.16 s4, [r1] -; CHECK-NEXT: vneg.f16 s4, s4 -; CHECK-NEXT: vfma.f16 s2, s0, s4 -; CHECK-NEXT: vneg.f16 s0, s2 -; CHECK-NEXT: vstr.16 s0, [r0] +; CHECK-NEXT: vldr.16 s0, [r1] +; CHECK-NEXT: vldr.16 s2, [r0] +; CHECK-NEXT: vldr.16 s4, [r2] +; CHECK-NEXT: vfnms.f16 s4, s2, s0 +; CHECK-NEXT: vstr.16 s4, [r0] ; CHECK-NEXT: bx lr ; ; DONT-FUSE-LABEL: fnms3: ; DONT-FUSE: @ %bb.0: -; DONT-FUSE-NEXT: vldr.16 s0, [r0] -; DONT-FUSE-NEXT: vldr.16 s2, [r2] -; DONT-FUSE-NEXT: vldr.16 s4, [r1] -; DONT-FUSE-NEXT: vneg.f16 s4, s4 -; DONT-FUSE-NEXT: vfma.f16 s2, s0, s4 -; DONT-FUSE-NEXT: vneg.f16 s0, s2 -; DONT-FUSE-NEXT: vstr.16 s0, [r0] +; DONT-FUSE-NEXT: vldr.16 s0, [r1] +; DONT-FUSE-NEXT: vldr.16 s2, [r0] +; DONT-FUSE-NEXT: vldr.16 s4, [r2] +; DONT-FUSE-NEXT: vfnms.f16 s4, s2, s0 +; DONT-FUSE-NEXT: vstr.16 s4, [r0] ; DONT-FUSE-NEXT: bx lr %f1 = load half, half *%a1, align 2