Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -357,7 +357,11 @@ def DontUseMovt : Predicate<"!Subtarget->useMovt(*MF)">; def UseMovtInPic : Predicate<"Subtarget->useMovt(*MF) && Subtarget->allowPositionIndependentMovt()">; def DontUseMovtInPic : Predicate<"!Subtarget->useMovt(*MF) || !Subtarget->allowPositionIndependentMovt()">; - def UseFPVMLx : Predicate<"Subtarget->useFPVMLx() || MF->getFunction().optForMinSize()">; + + def UseFPVMLx: Predicate<"(Subtarget->useFPVMLx() &&" + "!(TM.Options.AllowFPOpFusion == FPOpFusion::Fast &&" + " Subtarget->hasVFP4())) ||" + "MF->getFunction().optForMinSize()">; } def UseMulOps : Predicate<"Subtarget->useMulOps()">; @@ -368,10 +372,6 @@ " FPOpFusion::Fast && " " Subtarget->hasVFP4()) && " "!Subtarget->isTargetDarwin()">; -def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" - " FPOpFusion::Fast &&" - " Subtarget->hasVFP4()) || " - "Subtarget->isTargetDarwin()">; def HasFastVGETLNi32 : Predicate<"!Subtarget->hasSlowVGETLNi32()">; def HasSlowVGETLNi32 : Predicate<"Subtarget->hasSlowVGETLNi32()">; Index: lib/Target/ARM/ARMInstrNEON.td =================================================================== --- lib/Target/ARM/ARMInstrNEON.td +++ lib/Target/ARM/ARMInstrNEON.td @@ -4402,16 +4402,16 @@ IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAfd : N3VDMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, UseFPVMLx]>; def VMLAfq : N3VQMulOp<0, 0, 0b00, 0b1101, 1, IIC_VMACQ, "vmla", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, UseFPVMLx]>; def VMLAhd : N3VDMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACD, "vmla", "f16", v4f16, fmul_su, fadd_mlx>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; def VMLAhq : N3VQMulOp<0, 0, 0b01, 0b1101, 1, IIC_VMACQ, "vmla", "f16", v8f16, fmul_su, fadd_mlx>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; defm VMLAsl : N3VMulOpSL_HS<0b0000, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmla", "i", add>; def VMLAslfd : N3VDMulOpSL<0b10, 0b0001, IIC_VMACD, "vmla", "f32", @@ -4632,16 +4632,16 @@ IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSfd : N3VDMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, UseFPVMLx]>; def VMLSfq : N3VQMulOp<0, 0, 0b10, 0b1101, 1, IIC_VMACQ, "vmls", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasNEON, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, UseFPVMLx]>; def VMLShd : N3VDMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACD, "vmls", "f16", v4f16, fmul, fsub>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; def VMLShq : N3VQMulOp<0, 0, 0b11, 0b1101, 1, IIC_VMACQ, "vmls", "f16", v8f16, fmul, fsub>, - Requires<[HasNEON, HasFullFP16, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, HasFullFP16, UseFPVMLx]>; defm VMLSsl : N3VMulOpSL_HS<0b0100, IIC_VMACi16D, IIC_VMACi32D, IIC_VMACi16Q, IIC_VMACi32Q, "vmls", "i", sub>; def VMLSslfd : N3VDMulOpSL<0b10, 0b0101, IIC_VMACD, "vmls", "f32", @@ -7084,9 +7084,9 @@ def : N3VSPat; def : N3VSPat; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; def : N3VSMulOpPat, - Requires<[HasNEON, UseNEONForFP, UseFPVMLx, DontUseFusedMAC]>; + Requires<[HasNEON, UseNEONForFP, UseFPVMLx]>; def : N3VSMulOpPat, Requires<[HasVFP4, UseNEONForFP, UseFusedMAC]>; def : N3VSMulOpPat, Index: lib/Target/ARM/ARMInstrVFP.td =================================================================== --- lib/Target/ARM/ARMInstrVFP.td +++ lib/Target/ARM/ARMInstrVFP.td @@ -1814,7 +1814,7 @@ [(set DPR:$Dd, (fadd_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>, Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLAS : ASbIn<0b11100, 0b00, 0, 0, @@ -1823,7 +1823,7 @@ [(set SPR:$Sd, (fadd_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>, Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -1836,17 +1836,17 @@ [(set HPR:$Sd, (fadd_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,UseFPVMLx]>; def : Pat<(fadd_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>; def : Pat<(fadd_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,DontUseNEONForFP, UseFPVMLx]>; def : Pat<(fadd_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)), (VMLAH HPR:$dstin, HPR:$a, HPR:$b)>, - Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,DontUseNEONForFP, UseFPVMLx]>; def VMLSD : ADbI<0b11100, 0b00, 1, 0, @@ -1855,7 +1855,7 @@ [(set DPR:$Dd, (fadd_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>, Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VMLSS : ASbIn<0b11100, 0b00, 1, 0, @@ -1864,7 +1864,7 @@ [(set SPR:$Sd, (fadd_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>, Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -1877,17 +1877,17 @@ [(set HPR:$Sd, (fadd_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)), HPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,UseFPVMLx]>; def : Pat<(fsub_mlx DPR:$dstin, (fmul_su DPR:$a, (f64 DPR:$b))), (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>; def : Pat<(fsub_mlx SPR:$dstin, (fmul_su SPR:$a, SPR:$b)), (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; def : Pat<(fsub_mlx HPR:$dstin, (fmul_su HPR:$a, HPR:$b)), (VMLSH HPR:$dstin, HPR:$a, HPR:$b)>, - Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>; def VNMLAD : ADbI<0b11100, 0b01, 1, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1895,7 +1895,7 @@ [(set DPR:$Dd,(fsub_mlx (fneg (fmul_su DPR:$Dn,DPR:$Dm)), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>, Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLAS : ASbI<0b11100, 0b01, 1, 0, @@ -1904,7 +1904,7 @@ [(set SPR:$Sd, (fsub_mlx (fneg (fmul_su SPR:$Sn, SPR:$Sm)), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>, Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -1917,29 +1917,29 @@ [(set HPR:$Sd, (fsub_mlx (fneg (fmul_su HPR:$Sn, HPR:$Sm)), HPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,UseFPVMLx]>; // (-(a * b) - dst) -> -(dst + (a * b)) def : Pat<(fsub_mlx (fneg (fmul_su DPR:$a, (f64 DPR:$b))), DPR:$dstin), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>; def : Pat<(fsub_mlx (fneg (fmul_su SPR:$a, SPR:$b)), SPR:$dstin), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; def : Pat<(fsub_mlx (fneg (fmul_su HPR:$a, HPR:$b)), HPR:$dstin), (VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>, - Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>; // (-dst - (a * b)) -> -(dst + (a * b)) def : Pat<(fsub_mlx (fneg DPR:$dstin), (fmul_su DPR:$a, (f64 DPR:$b))), (VNMLAD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>; def : Pat<(fsub_mlx (fneg SPR:$dstin), (fmul_su SPR:$a, SPR:$b)), (VNMLAS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; def : Pat<(fsub_mlx (fneg HPR:$dstin), (fmul_su HPR:$a, HPR:$b)), (VNMLAH HPR:$dstin, HPR:$a, HPR:$b)>, - Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>; def VNMLSD : ADbI<0b11100, 0b01, 0, 0, (outs DPR:$Dd), (ins DPR:$Ddin, DPR:$Dn, DPR:$Dm), @@ -1947,7 +1947,7 @@ [(set DPR:$Dd, (fsub_mlx (fmul_su DPR:$Dn, DPR:$Dm), (f64 DPR:$Ddin)))]>, RegConstraint<"$Ddin = $Dd">, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>, Sched<[WriteFPMAC64, ReadFPMAC, ReadFPMUL, ReadFPMUL]>; def VNMLSS : ASbI<0b11100, 0b01, 0, 0, @@ -1955,7 +1955,7 @@ IIC_fpMAC32, "vnmls", ".f32\t$Sd, $Sn, $Sm", [(set SPR:$Sd, (fsub_mlx (fmul_su SPR:$Sn, SPR:$Sm), SPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>, + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>, Sched<[WriteFPMAC32, ReadFPMAC, ReadFPMUL, ReadFPMUL]> { // Some single precision VFP instructions may be executed on both NEON and // VFP pipelines on A8. @@ -1967,17 +1967,17 @@ IIC_fpMAC16, "vnmls", ".f16\t$Sd, $Sn, $Sm", [(set HPR:$Sd, (fsub_mlx (fmul_su HPR:$Sn, HPR:$Sm), HPR:$Sdin))]>, RegConstraint<"$Sdin = $Sd">, - Requires<[HasFullFP16,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,UseFPVMLx]>; def : Pat<(fsub_mlx (fmul_su DPR:$a, (f64 DPR:$b)), DPR:$dstin), (VNMLSD DPR:$dstin, DPR:$a, DPR:$b)>, - Requires<[HasVFP2,HasDPVFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,HasDPVFP,UseFPVMLx]>; def : Pat<(fsub_mlx (fmul_su SPR:$a, SPR:$b), SPR:$dstin), (VNMLSS SPR:$dstin, SPR:$a, SPR:$b)>, - Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasVFP2,DontUseNEONForFP,UseFPVMLx]>; def : Pat<(fsub_mlx (fmul_su HPR:$a, HPR:$b), HPR:$dstin), (VNMLSH HPR:$dstin, HPR:$a, HPR:$b)>, - Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx,DontUseFusedMAC]>; + Requires<[HasFullFP16,DontUseNEONForFP,UseFPVMLx]>; //===----------------------------------------------------------------------===// // Fused FP Multiply-Accumulate Operations. Index: test/CodeGen/ARM/fmacs.ll =================================================================== --- test/CodeGen/ARM/fmacs.ll +++ test/CodeGen/ARM/fmacs.ll @@ -27,10 +27,11 @@ ret float %1 } -define float @vlma_minsize(float %acc, float %a, float %b) #0 { +define float @vmla_minsize(float %acc, float %a, float %b) #0 { entry: -; VMLA-LABEL: vlma_minsize: -; VLMA: vmla.f32 s0, s1, s2 +; VMLA-LABEL: vmla_minsize: +; VMLA: vmla.f32 s0, s1, s2 +; VMLA-NEXT: bx lr %0 = fmul float %a, %b %1 = fadd float %acc, %0