Index: lib/Target/ARM/ARMInstrInfo.td =================================================================== --- lib/Target/ARM/ARMInstrInfo.td +++ lib/Target/ARM/ARMInstrInfo.td @@ -4076,40 +4076,40 @@ [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sext_inreg GPR:$Rm, i16)))]>, Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def BT : AMulxyI<0b0001011, 0b10, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "bt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sext_inreg GPR:$Rn, i16), (sra GPR:$Rm, (i32 16))))]>, Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TB : AMulxyI<0b0001011, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tb"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sext_inreg GPR:$Rm, i16)))]>, Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def TT : AMulxyI<0b0001011, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "tt"), "\t$Rd, $Rn, $Rm", [(set GPR:$Rd, (mul (sra GPR:$Rn, (i32 16)), (sra GPR:$Rm, (i32 16))))]>, Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WB : AMulxyI<0b0001001, 0b01, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; def WT : AMulxyI<0b0001001, 0b11, (outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), IIC_iMUL16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV5TE]>, - Sched<[WriteMUL32, ReadMUL, ReadMUL]>; + Sched<[WriteMUL16, ReadMUL, ReadMUL]>; } @@ -4122,7 +4122,7 @@ (mul (sext_inreg GPRnopc:$Rn, i16), (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def BT : AMulxyIa<0b0001000, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4131,7 +4131,7 @@ (add GPR:$Ra, (mul (sext_inreg GPRnopc:$Rn, i16), (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TB : AMulxyIa<0b0001000, 0b01, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4140,7 +4140,7 @@ (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sext_inreg GPRnopc:$Rm, i16))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def TT : AMulxyIa<0b0001000, 0b11, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), @@ -4149,21 +4149,21 @@ (add GPR:$Ra, (mul (sra GPRnopc:$Rn, (i32 16)), (sra GPRnopc:$Rm, (i32 16)))))]>, Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WB : AMulxyIa<0b0001001, 0b00, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wb"), "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; def WT : AMulxyIa<0b0001001, 0b10, (outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm, GPR:$Ra), IIC_iMAC16, !strconcat(opc, "wt"), "\t$Rd, $Rn, $Rm, $Ra", []>, Requires<[IsARM, HasV5TE, UseMulOps]>, - Sched<[WriteMAC32, ReadMUL, ReadMUL, ReadMAC]>; + Sched<[WriteMAC16, ReadMUL, ReadMUL, ReadMAC]>; } } Index: lib/Target/ARM/ARMSchedule.td =================================================================== --- lib/Target/ARM/ARMSchedule.td +++ lib/Target/ARM/ARMSchedule.td @@ -73,15 +73,17 @@ def WriteCMPsr : SchedWrite; // Multiplys. -def WriteMUL32 : SchedWrite; // 32-bit result +def WriteMUL16 : SchedWrite; // 16-bit multiply. +def WriteMUL32 : SchedWrite; // 32-bit multiply. def WriteMUL64Lo : SchedWrite; // 64-bit result. Low reg. def WriteMUL64Hi : SchedWrite; // 64-bit result. High reg. def ReadMUL : SchedRead; // Multiply-accumulates. -def WriteMAC32 : SchedWrite; // 32-bit result -def WriteMAC64Lo : SchedWrite; // 64-bit result. Low reg. -def WriteMAC64Hi : SchedWrite; // 64-bit result. High reg. +def WriteMAC16 : SchedWrite; // 16-bit mac. +def WriteMAC32 : SchedWrite; // 32-bit mac. +def WriteMAC64Lo : SchedWrite; // 64-bit mac. Low reg. +def WriteMAC64Hi : SchedWrite; // 64-bit mac. High reg. def ReadMAC : SchedRead; // Divisions. Index: lib/Target/ARM/ARMScheduleA9.td =================================================================== --- lib/Target/ARM/ARMScheduleA9.td +++ lib/Target/ARM/ARMScheduleA9.td @@ -1944,9 +1944,11 @@ def A9WriteM16 : SchedWriteRes<[A9UnitMul]> { let Latency = 3; } def A9WriteM16Hi : SchedWriteRes<[A9UnitMul]> { let Latency = 4; let NumMicroOps = 0; } +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; Index: lib/Target/ARM/ARMScheduleR52.td =================================================================== --- lib/Target/ARM/ARMScheduleR52.td +++ lib/Target/ARM/ARMScheduleR52.td @@ -159,9 +159,11 @@ def R52WriteNoRSRC_WRI : SchedWriteRes<[]> { let Latency = 4; } // Alias generics to sub-target specific +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; Index: lib/Target/ARM/ARMScheduleSwift.td =================================================================== --- lib/Target/ARM/ARMScheduleSwift.td +++ lib/Target/ARM/ARMScheduleSwift.td @@ -283,9 +283,11 @@ } // Aliasing sub-target specific WriteRes to generic ones + def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; + def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; Index: test/CodeGen/ARM/misched-int-basic.ll =================================================================== --- test/CodeGen/ARM/misched-int-basic.ll +++ test/CodeGen/ARM/misched-int-basic.ll @@ -9,7 +9,7 @@ ; Check the latency of basic int instructions for processors with sched-models ; ; Function Attrs: norecurse nounwind readnone -define i64 @foo(i32 %a, i32 %b, i32 %c) local_unnamed_addr #0 { +define i64 @foo(i32 %a, i32 %b, i16 signext %c) local_unnamed_addr #0 { entry: ; ; CHECK: ********** MI Scheduling ********** @@ -18,6 +18,11 @@ ; CHECK_SWIFT: Latency : 1 ; CHECK_R52: Latency : 3 ; +; CHECK: SMULBB +; CHECK_A9: Latency : 2 +; CHECK_SWIFT: Latency : 4 +; CHECK_R52: Latency : 4 +; ; CHECK: MUL ; CHECK_SWIFT: Latency : 4 ; CHECK_A9: Latency : 2 @@ -33,13 +38,23 @@ ; CHECK_A9: Latency : 3 ; CHECK_R52: Latency : 4 ; +; CHECK: SMLAL +; CHECK_SWIFT: Latency : 7 +; CHECK_A9: Latency : 3 +; CHECK_R52: Latency : 4 +; %add = add nsw i32 %b, %a - %mul = mul nsw i32 %add, %c + %tmp = trunc i32 %add to i16 + %convc = sext i16 %tmp to i32 + %mul0 = mul nsw i32 %convc, %convc + %mul = mul nsw i32 %add, %mul0 %mul1 = mul nsw i32 %mul, %mul %add2 = add nsw i32 %mul1, %mul %conv = sext i32 %add2 to i64 %mul3 = mul nsw i64 %conv, %conv - ret i64 %mul3 + %mul4 = mul nsw i64 %mul3, %mul3 + %add3 = add nsw i64 %mul4, %mul3 + ret i64 %add3 }