Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -3899,7 +3899,8 @@ (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b0000; let Unpredictable{15-12} = 0b1111; } @@ -3910,14 +3911,16 @@ 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6, UseMulOps]>; + Requires<[IsARM, NoV6, UseMulOps]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]> { + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3928,12 +3931,14 @@ pred:$p, cc_out:$s), 4, IIC_iMAC32, [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2, UseMulOps]> { + Requires<[IsARM, HasV6T2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3950,25 +3955,29 @@ def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>; let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL64, [], (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL64, [], (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } } @@ -3976,17 +3985,20 @@ def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rm; @@ -4004,13 +4016,15 @@ 4, IIC_iMAC64, [], (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; } } // hasSideEffects @@ -4019,13 +4033,15 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } @@ -4033,57 +4049,67 @@ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; multiclass AI_smul { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } @@ -4095,7 +4121,8 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4103,7 +4130,8 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4111,7 +4139,8 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4119,19 +4148,22 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; } } @@ -4142,22 +4174,26 @@ def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; // Helper class for AI_smld. class AMulDualIbase; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; + !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; + !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } @@ -4225,9 +4265,11 @@ multiclass AI_sdml { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } defm SMUA : AI_sdml<0, "smua">; @@ -4239,12 +4281,14 @@ def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -5525,20 +5569,26 @@ // smul* and smla* def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>; + (SMULBB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; + (SMULBT GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>; + (SMULTB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Index: lib/Target/ARM/ARMSchedule.td =================================================================== --- lib/Target/ARM/ARMSchedule.td +++ lib/Target/ARM/ARMSchedule.td @@ -72,8 +72,20 @@ def WriteCMPsi : SchedWrite; def WriteCMPsr : SchedWrite; -// Division. -def WriteDiv : SchedWrite; +// Multiplys. +def WriteMUL32 : SchedWrite; // 32-bit result +def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. +def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. +def ReadMUL : SchedRead; + +// Multiply-accumulates. +def WriteMAC32 : SchedWrite; // 32-bit result +def WriteMAC64Lo : SchedWrite; // 64-bit result. Low reg. +def WriteMAC64Hi : SchedWrite; // 64-bit result. High reg. +def ReadMAC : SchedRead; + +// Divisions. +def WriteDIV : SchedWrite; // Loads. def WriteLd : SchedWrite; Index: lib/Target/ARM/ARMScheduleA9.td =================================================================== --- lib/Target/ARM/ARMScheduleA9.td +++ lib/Target/ARM/ARMScheduleA9.td @@ -1944,6 +1944,14 @@ def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; let NumMicroOps = 0; } +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; +def : ReadAdvance; // Floating-point // Only one FP or AGU instruction may issue per cycle. We model this @@ -1953,6 +1961,7 @@ def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } + def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } @@ -2545,7 +2554,7 @@ "LDRH", "LDRSH", "LDRSB")>; def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; -def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 0; } def : WriteRes; def : WriteRes; Index: lib/Target/ARM/ARMScheduleR52.td =================================================================== --- lib/Target/ARM/ARMScheduleR52.td +++ lib/Target/ARM/ARMScheduleR52.td @@ -70,8 +70,10 @@ def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } +// Multiply - aliased to sub-target specific later + // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) -def : WriteRes { +def : WriteRes { let Latency = 8; let ResourceCycles = [8]; // not pipelined } @@ -90,7 +92,8 @@ // Integer pipeline by-passes def : ReadAdvance; // Operand needed in EX1 stage def : ReadAdvance; // Shift operands needed in ISS - +def : ReadAdvance; +def : ReadAdvance; // Floating-point. Map target-defined SchedReadWrites to subtarget def : WriteRes { let Latency = 6; } @@ -124,7 +127,6 @@ def : ReadAdvance; // mul operand read in F1 def : ReadAdvance; // fp-mac operand read in F1 - //===----------------------------------------------------------------------===// // Subtarget-specific SchedReadWrites. @@ -139,6 +141,9 @@ // Cortex-R52 specific SchedWrites for use with InstRW def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } +def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> { + let Latency = 4; let NumMicroOps = 0; +} def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { let Latency = 8; let ResourceCycles = [8]; // not pipelined } @@ -153,6 +158,14 @@ def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } +// Alias generics to sub-target specific +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { let Latency = 4; @@ -266,7 +279,7 @@ "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], - (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + (instregex "t2SDIV", "t2UDIV")>; // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. // However, that's non-trivial to specify, so we keep it uniform Index: lib/Target/ARM/ARMScheduleSwift.td =================================================================== --- lib/Target/ARM/ARMScheduleSwift.td +++ lib/Target/ARM/ARMScheduleSwift.td @@ -282,6 +282,16 @@ let ResourceCycles = [2, 3]; } + // Aliasing sub-target specific WriteRes to generic ones + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : ReadAdvance; + def : SchedAlias; + // 4.2.15 Integer Multiply Accumulate, Long // 4.2.16 Integer Multiply Accumulate, Dual // 4.2.17 Integer Multiply Accumulate Accumulate, Long @@ -300,7 +310,7 @@ let ResourceCycles = [1, 14]; } // 4.2.18 Integer Divide - def : WriteRes; // Workaround. + def : WriteRes; // Workaround. def : InstRW <[SwiftDiv], (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; Index: test/CodeGen/ARM/misched-int-basic.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/misched-int-basic.ll @@ -0,0 +1,45 @@ +; REQUIRES: asserts +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-a9 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +; RUN: llc < %s -mtriple=arm-eabi -mcpu=swift -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +; RUN: llc < %s -mtriple=arm-eabi -mcpu=cortex-r52 -enable-misched -verify-misched -debug-only=misched -o - 2>&1 > \ +; RUN: /dev/null | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +; +; Check the latency of basic int instructions for processors with sched-models +; +; Function Attrs: norecurse nounwind readnone +define i64 @foo(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { +entry: +; +; CHECK: ********** MI Scheduling ********** +; CHECK: ADDrr +; CHECK_A9: Latency : 1 +; CHECK_SWIFT: Latency : 1 +; CHECK_R52: Latency : 3 +; +; CHECK: MUL +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 2 +; CHECK_R52: Latency : 4 +; +; CHECK: MLA +; CHECK_SWIFT: Latency : 4 +; CHECK_A9: Latency : 2 +; CHECK_R52: Latency : 4 +; +; CHECK: SMULL +; CHECK_SWIFT: Latency : 5 +; CHECK_A9: Latency : 3 +; CHECK_R52: Latency : 4 +; + %add = add nsw i32 %b, %a + %mul = mul nsw i32 %add, %c + %mul1 = mul nsw i32 %mul, %mul + %add2 = add nsw i32 %mul1, %mul + %conv = sext i32 %add2 to i64 + %mul3 = mul nsw i64 %conv, %conv + ret i64 %mul3 +} + +