Index: llvm/trunk/lib/Target/ARM/ARMInstrInfo.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrInfo.td +++ llvm/trunk/lib/Target/ARM/ARMInstrInfo.td @@ -3899,7 +3899,8 @@ (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b0000; let Unpredictable{15-12} = 0b1111; } @@ -3910,14 +3911,16 @@ 4, IIC_iMUL32, [(set GPRnopc:$Rd, (mul GPRnopc:$Rn, GPRnopc:$Rm))], (MUL GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6, UseMulOps]>; + Requires<[IsARM, NoV6, UseMulOps]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } def MLA : AsMul1I32<0b0000001, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra), IIC_iMAC32, "mla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]> { + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Ra; let Inst{15-12} = Ra; } @@ -3928,12 +3931,14 @@ pred:$p, cc_out:$s), 4, IIC_iMAC32, [(set GPRnopc:$Rd, (add (mul GPRnopc:$Rn, GPRnopc:$Rm), GPRnopc:$Ra))], (MLA GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, GPRnopc:$Ra, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def MLS : AMul1I<0b0000011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "mls", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (sub GPR:$Ra, (mul GPR:$Rn, GPR:$Rm)))]>, - Requires<[IsARM, HasV6T2, UseMulOps]> { + Requires<[IsARM, HasV6T2, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -3950,25 +3955,29 @@ def SMULL : AsMul1I64<0b0000110, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, "smull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULL : AsMul1I64<0b0000100, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL64, "umull", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL]>; let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi" in { def SMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL64, [], (SMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; def UMULLv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s), 4, IIC_iMUL64, [], (UMULL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } } @@ -3976,17 +3985,20 @@ def SMLAL : AsMla1I64<0b0000111, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "smlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLAL : AsMla1I64<0b0000101, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umlal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>; + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMAAL : AMul1I <0b0000010, (outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi), IIC_iMAC64, "umaal", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]> { + RegConstraint<"$RLo = $RdLo, $RHi = $RdHi">, Requires<[IsARM, HasV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]> { bits<4> RdLo; bits<4> RdHi; bits<4> Rm; @@ -4004,13 +4016,15 @@ 4, IIC_iMAC64, [], (SMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def UMLALv5 : ARMPseudoExpand<(outs GPR:$RdLo, GPR:$RdHi), (ins GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s), 4, IIC_iMAC64, [], (UMLAL GPR:$RdLo, GPR:$RdHi, GPR:$Rn, GPR:$Rm, GPR:$RLo, GPR:$RHi, pred:$p, cc_out:$s)>, - Requires<[IsARM, NoV6]>; + Requires<[IsARM, NoV6]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; } } // hasSideEffects @@ -4019,13 +4033,15 @@ def SMMUL : AMul2I <0b0111010, 0b0001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmul", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mulhs GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } def SMMULR : AMul2I <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL32, "smmulr", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]> { let Inst{15-12} = 0b1111; } @@ -4033,57 +4049,67 @@ (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmla", "\t$Rd, $Rn, $Rm, $Ra", [(set GPR:$Rd, (add (mulhs GPR:$Rn, GPR:$Rm), GPR:$Ra))]>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLAR : AMul2Ia <0b0111010, 0b0011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlar", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLS : AMul2Ia <0b0111010, 0b1101, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmls", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6, UseMulOps]>; + Requires<[IsARM, HasV6, UseMulOps]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def SMMLSR : AMul2Ia <0b0111010, 0b1111, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), IIC_iMAC32, "smmlsr", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]>; + Requires<[IsARM, HasV6]>, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; multiclass AI_smul { def BB : AMulxyI<0b0001011, 0b00, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sext_inreg GPR:$Rm, i16)))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sra GPR:$Rm, (i32 16))))]>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; } @@ -4095,7 +4121,8 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4103,7 +4130,8 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4111,7 +4139,8 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4119,19 +4148,22 @@ [(set GPRnopc:$Rd, (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV5TE, UseMulOps]>; + Requires<[IsARM, HasV5TE, UseMulOps]>, + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } @@ -4142,22 +4174,26 @@ def SMLALBB : AMulxyI64<0b0001010, 0b00, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlalbb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def SMLALBT : AMulxyI64<0b0001010, 0b10, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlalbt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def SMLALTB : AMulxyI64<0b0001010, 0b01, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlaltb", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def SMLALTT : AMulxyI64<0b0001010, 0b11, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), IIC_iMAC64, "smlaltt", "\t$RdLo, $RdHi, $Rn, $Rm", []>, - Requires<[IsARM, HasV5TE]>; + Requires<[IsARM, HasV5TE]>, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; // Helper class for AI_smld. class AMulDualIbase; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def DX: AMulDualIa<0, sub, 1, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm, $Ra">, + Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; def LD: AMulDualI64<1, sub, 0, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">; + !strconcat(opc, "ld"), "\t$RdLo, $RdHi, $Rn, $Rm">, + Sched<[WriteMAC64Lo, WriteMAC64Hi, ReadMUL, ReadMUL, ReadMAC, ReadMAC]>; def LDX : AMulDualI64<1, sub, 1, (outs GPRnopc:$RdLo, GPRnopc:$RdHi), (ins GPRnopc:$Rn, GPRnopc:$Rm), NoItinerary, - !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">; + !strconcat(opc, "ldx"),"\t$RdLo, $RdHi, $Rn, $Rm">, + Sched<[WriteMUL64Lo, WriteMUL64Hi, ReadMUL, ReadMUL]>; } @@ -4225,9 +4265,11 @@ multiclass AI_sdml { def D:AMulDualI<0, sub, 0, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "d"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def DX:AMulDualI<0, sub, 1, (outs GPRnopc:$Rd),(ins GPRnopc:$Rn, GPRnopc:$Rm), - NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">; + NoItinerary, !strconcat(opc, "dx"), "\t$Rd, $Rn, $Rm">, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; } defm SMUA : AI_sdml<0, "smua">; @@ -4239,12 +4281,14 @@ def SDIV : ADivA1I<0b001, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "sdiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (sdiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; def UDIV : ADivA1I<0b011, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iDIV, "udiv", "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (udiv GPR:$Rn, GPR:$Rm))]>, - Requires<[IsARM, HasDivideInARM]>; + Requires<[IsARM, HasDivideInARM]>, + Sched<[WriteDIV]>; //===----------------------------------------------------------------------===// // Misc. Arithmetic Instructions. @@ -5526,20 +5570,26 @@ // smul* and smla* def : ARMV5TEPat<(mul sext_16_node:$a, sext_16_node:$b), - (SMULBB GPR:$a, GPR:$b)>; + (SMULBB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul sext_16_node:$a, (sra GPR:$b, (i32 16))), - (SMULBT GPR:$a, GPR:$b)>; + (SMULBT GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5TEPat<(mul (sra GPR:$a, (i32 16)), sext_16_node:$b), - (SMULTB GPR:$a, GPR:$b)>; + (SMULTB GPR:$a, GPR:$b)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, sext_16_node:$b)), - (SMLABB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul sext_16_node:$a, (sra GPR:$b, (i32 16)))), - (SMLABT GPR:$a, GPR:$b, GPR:$acc)>; + (SMLABT GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; def : ARMV5MOPat<(add GPR:$acc, (mul (sra GPR:$a, (i32 16)), sext_16_node:$b)), - (SMLATB GPR:$a, GPR:$b, GPR:$acc)>; + (SMLATB GPR:$a, GPR:$b, GPR:$acc)>, + Sched<[WriteMUL32, ReadMUL, ReadMUL]>; // Pre-v7 uses MCR for synchronization barriers. def : ARMPat<(ARMMemBarrierMCR GPR:$zero), (MCR 15, 0, GPR:$zero, 7, 10, 5)>, Index: llvm/trunk/lib/Target/ARM/ARMSchedule.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSchedule.td +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td @@ -72,8 +72,22 @@ def WriteCMPsi : SchedWrite; def WriteCMPsr : SchedWrite; -// Division. -def WriteDiv : SchedWrite; +// Multiplys. +def WriteMUL16 : SchedWrite; // 16-bit multiply. +def WriteMUL32 : SchedWrite; // 32-bit multiply. +def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. +def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. +def ReadMUL : SchedRead; + +// Multiply-accumulates. +def WriteMAC16 : SchedWrite; // 16-bit mac. +def WriteMAC32 : SchedWrite; // 32-bit mac. +def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. +def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. +def ReadMAC : SchedRead; + +// Divisions. +def WriteDIV : SchedWrite; // Loads. def WriteLd : SchedWrite; Index: llvm/trunk/lib/Target/ARM/ARMScheduleA9.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleA9.td +++ llvm/trunk/lib/Target/ARM/ARMScheduleA9.td @@ -1944,6 +1944,16 @@ def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; let NumMicroOps = 0; } +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : ReadAdvance; +def : ReadAdvance; // Floating-point // Only one FP or AGU instruction may issue per cycle. We model this @@ -1953,6 +1963,7 @@ def A9WriteFMulS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 5; } def A9WriteFMulD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 6; } def A9WriteFMAS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 8; } + def A9WriteFMAD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 9; } def A9WriteFDivS : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 15; } def A9WriteFDivD : SchedWriteRes<[A9UnitFP, A9UnitAGU]> { let Latency = 25; } @@ -2545,7 +2556,7 @@ "LDRH", "LDRSH", "LDRSB")>; def : InstRW<[A9WriteLbsi], (instregex "LDRrs")>; -def : WriteRes { let Latency = 0; } +def : WriteRes { let Latency = 0; } def : WriteRes; def : WriteRes; Index: llvm/trunk/lib/Target/ARM/ARMScheduleR52.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleR52.td +++ llvm/trunk/lib/Target/ARM/ARMScheduleR52.td @@ -70,8 +70,10 @@ def : WriteRes { let Latency = 0; } def : WriteRes { let Latency = 0; } +// Multiply - aliased to sub-target specific later + // Div - may stall 0-9 cycles depending on input (i.e. WRI+(0-9)/2) -def : WriteRes { +def : WriteRes { let Latency = 8; let ResourceCycles = [8]; // not pipelined } @@ -90,7 +92,8 @@ // Integer pipeline by-passes def : ReadAdvance; // Operand needed in EX1 stage def : ReadAdvance; // Shift operands needed in ISS - +def : ReadAdvance; +def : ReadAdvance; // Floating-point. Map target-defined SchedReadWrites to subtarget def : WriteRes { let Latency = 6; } @@ -124,7 +127,6 @@ def : ReadAdvance; // mul operand read in F1 def : ReadAdvance; // fp-mac operand read in F1 - //===----------------------------------------------------------------------===// // Subtarget-specific SchedReadWrites. @@ -139,6 +141,9 @@ // Cortex-R52 specific SchedWrites for use with InstRW def R52WriteMAC : SchedWriteRes<[R52UnitMAC]> { let Latency = 4; } +def R52WriteMACHi : SchedWriteRes<[R52UnitMAC]> { + let Latency = 4; let NumMicroOps = 0; +} def R52WriteDIV : SchedWriteRes<[R52UnitDiv]> { let Latency = 8; let ResourceCycles = [8]; // not pipelined } @@ -153,6 +158,16 @@ def R52WriteNoRSRC_EX2 : SchedWriteRes<[]> { let Latency = 3; } def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } +// Alias generics to sub-target specific +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; +def : SchedAlias; + def R52WriteFPALU_F3 : SchedWriteRes<[R52UnitFPALU]> { let Latency = 4; } def R52Write2FPALU_F3 : SchedWriteRes<[R52UnitFPALU, R52UnitFPALU]> { let Latency = 4; @@ -266,7 +281,7 @@ "t2SMLSLD", "t2SMLSLDX", "t2UMAAL")>; def : InstRW <[R52WriteDIV, R52Read_ISS, R52Read_ISS], - (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; + (instregex "t2SDIV", "t2UDIV")>; // Loads (except POST) with SHL > 2, or ror, require 2 extra cycles. // However, that's non-trivial to specify, so we keep it uniform Index: llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td +++ llvm/trunk/lib/Target/ARM/ARMScheduleSwift.td @@ -282,6 +282,18 @@ let ResourceCycles = [2, 3]; } + // Aliasing sub-target specific WriteRes to generic ones + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : SchedAlias; + def : ReadAdvance; + def : SchedAlias; + // 4.2.15 Integer Multiply Accumulate, Long // 4.2.16 Integer Multiply Accumulate, Dual // 4.2.17 Integer Multiply Accumulate Accumulate, Long @@ -300,7 +312,7 @@ let ResourceCycles = [1, 14]; } // 4.2.18 Integer Divide - def : WriteRes; // Workaround. + def : WriteRes; // Workaround. def : InstRW <[SwiftDiv], (instregex "SDIV", "UDIV", "t2SDIV", "t2UDIV")>; Index: llvm/trunk/test/CodeGen/ARM/misched-int-basic.mir =================================================================== --- llvm/trunk/test/CodeGen/ARM/misched-int-basic.mir +++ llvm/trunk/test/CodeGen/ARM/misched-int-basic.mir @@ -0,0 +1,128 @@ +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=swift -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_SWIFT +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-a9 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_A9 +# RUN: llc -o /dev/null %s -mtriple=arm-eabi -mcpu=cortex-r52 -run-pass machine-scheduler -enable-misched -verify-misched \ +# RUN: -debug-only=misched 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK_R52 +# REQUIRES: asserts +--- | + ; ModuleID = 'foo.ll' + source_filename = "foo.ll" + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "arm---eabi" + + define i64 @foo(i16 signext %a, i16 signext %b) { + entry: + %d = mul nsw i16 %a, %a + %e = mul nsw i16 %b, %b + %f = add nuw nsw i16 %e, %d + %c = zext i16 %f to i32 + %mul8 = mul nsw i32 %c, %c + %mul9 = mul nsw i32 %mul8, %mul8 + %add10 = add nuw nsw i32 %mul9, %mul8 + %conv1130 = zext i32 %add10 to i64 + %mul12 = mul nuw nsw i64 %conv1130, %conv1130 + %mul13 = mul nsw i64 %mul12, %mul12 + %add14 = add nuw nsw i64 %mul13, %mul12 + ret i64 %add14 + } + +# CHECK: ********** MI Scheduling ********** +# CHECK: SU(2): %vreg2 = SMULBB %vreg1, %vreg1, pred:14, pred:%noreg; GPR:%vreg2,%vreg1,%vreg1 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(3): %vreg3 = SMLABB %vreg0, %vreg0, %vreg2, pred:14, pred:%noreg; GPRnopc:%vreg3,%vreg0,%vreg0 GPR:%vreg2 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(4): %vreg4 = UXTH %vreg3, 0, pred:14, pred:%noreg; GPRnopc:%vreg4,%vreg3 +# CHECK_A9: Latency : 1 +# CHECK_SWIFT: Latency : 1 +# CHECK_R52: Latency : 3 +# +# CHECK: SU(5): %vreg5 = MUL %vreg4, %vreg4, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg5,%vreg4,%vreg4 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(6): %vreg6 = MLA %vreg5, %vreg5, %vreg5, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg6,%vreg5,%vreg5,%vreg5 +# CHECK_A9: Latency : 2 +# CHECK_SWIFT: Latency : 4 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(7): %vreg7, %vreg8 = UMULL %vreg6, %vreg6, pred:14, pred:%noreg, opt:%noreg; GPRnopc:%vreg7,%vreg8,%vreg6,%vreg6 +# CHECK_A9: Latency : 3 +# CHECK_SWIFT: Latency : 5 +# CHECK_R52: Latency : 4 +# +# CHECK: SU(11): %vreg13, %vreg14 = UMLAL %vreg6, %vreg6, %vreg13, %vreg14, pred:14, pred:%noreg, opt:%noreg; GPR:%vreg13 GPRnopc:%vreg14,%vreg6,%vreg6 +# CHECK_SWIFT: Latency : 7 +# CHECK_A9: Latency : 3 +# CHECK_R52: Latency : 4 +# CHECK: ** ScheduleDAGMILive::schedule picking next node +... +--- +name: foo +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +tracksRegLiveness: true +registers: + - { id: 0, class: gprnopc } + - { id: 1, class: gpr } + - { id: 2, class: gpr } + - { id: 3, class: gprnopc } + - { id: 4, class: gprnopc } + - { id: 5, class: gprnopc } + - { id: 6, class: gprnopc } + - { id: 7, class: gprnopc } + - { id: 8, class: gprnopc } + - { id: 9, class: gpr } + - { id: 10, class: gprnopc } + - { id: 11, class: gprnopc } + - { id: 12, class: gprnopc } + - { id: 13, class: gpr } + - { id: 14, class: gprnopc } +liveins: + - { reg: '%r0', virtual-reg: '%0' } + - { reg: '%r1', virtual-reg: '%1' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 0 + offsetAdjustment: 0 + maxAlignment: 0 + adjustsStack: false + hasCalls: false + maxCallFrameSize: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false +body: | + bb.0.entry: + liveins: %r0, %r1 + + %1 = COPY %r1 + %0 = COPY %r0 + %2 = SMULBB %1, %1, 14, _ + %3 = SMLABB %0, %0, %2, 14, _ + %4 = UXTH %3, 0, 14, _ + %5 = MUL %4, %4, 14, _, _ + %6 = MLA %5, %5, %5, 14, _, _ + %7, %8 = UMULL %6, %6, 14, _, _ + %13, %10 = UMULL %7, %7, 14, _, _ + %11 = MLA %7, %8, %10, 14, _, _ + %14 = MLA %7, %8, %11, 14, _, _ + %13, %14 = UMLAL %6, %6, %13, %14, 14, _, _ + %r0 = COPY %13 + %r1 = COPY %14 + BX_RET 14, _, implicit %r0, implicit %r1 + +...