Index: llvm/trunk/lib/Target/ARM/ARM.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARM.td +++ llvm/trunk/lib/Target/ARM/ARM.td @@ -978,21 +978,27 @@ FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, +def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m, ProcM3, FeaturePrefLoopAlign32, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, +def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, ProcM3, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, +def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, FeatureVFP4, FeatureVFPOnlySP, FeatureD16, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, @@ -1002,22 +1008,26 @@ def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, FeatureNoMovt]>; -def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, +def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, FeatureVFPOnlySP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"cortex-m35p", CortexM3Model, [ARMv8mMainline, +def : ProcessorModel<"cortex-m35p", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, FeatureVFPOnlySP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb.td @@ -663,7 +663,7 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}> { + T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> { // A6.2 & A8.6.59 bits<3> Rt; bits<8> addr; @@ -677,7 +677,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}> { + T1LdStSP<{1,?,?}>, Sched<[WriteLd]> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; @@ -728,39 +728,39 @@ defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", - load>; + load>, Sched<[WriteLd]>; // A8.6.64 & A8.6.61 defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", - zextloadi8>; + zextloadi8>, Sched<[WriteLd]>; // A8.6.76 & A8.6.73 defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", - zextloadi16>; + zextloadi16>, Sched<[WriteLd]>; let AddedComplexity = 10 in def tLDRSB : // A8.6.80 T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr), AddrModeT1_1, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", - [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>; + [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>; let AddedComplexity = 10 in def tLDRSH : // A8.6.84 T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr_sext:$addr), AddrModeT1_2, IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr", - [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>; + [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr_sext:$addr))]>, Sched<[WriteLd]>; def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$Rt, $addr", [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, - T1LdStSP<{0,?,?}> { + T1LdStSP<{0,?,?}>, Sched<[WriteST]> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; @@ -771,19 +771,19 @@ defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iStore_r, IIC_iStore_i, "str", - store>; + store>, Sched<[WriteST]>; // A8.6.197 & A8.6.195 defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", - truncstorei8>; + truncstorei8>, Sched<[WriteST]>; // A8.6.207 & A8.6.205 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", - truncstorei16>; + truncstorei16>, Sched<[WriteST]>; //===----------------------------------------------------------------------===// @@ -843,7 +843,7 @@ def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iPop, "pop${p}\t$regs", []>, - T1Misc<{1,1,0,?,?,?,?}> { + T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> { bits<16> regs; let Inst{8} = regs{15}; let Inst{7-0} = regs{7-0}; @@ -853,7 +853,7 @@ def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iStore_m, "push${p}\t$regs", []>, - T1Misc<{0,1,0,?,?,?,?}> { + T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> { bits<16> regs; let Inst{8} = regs{14}; let Inst{7-0} = regs{7-0}; @@ -1214,7 +1214,7 @@ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2, IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd", [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>, - T1DataProcessing<0b1101> { + T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> { bits<3> Rd; bits<3> Rn; let Inst{5-3} = Rn; Index: llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td +++ llvm/trunk/lib/Target/ARM/ARMInstrThumb2.td @@ -1333,7 +1333,8 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), @@ -2331,14 +2332,14 @@ def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), "ssat", "\t$Rd, $sat_imm, $Rn$sh">, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU]> { let Inst{23-22} = 0b00; let Inst{5} = 0; } def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn), "ssat16", "\t$Rd, $sat_imm, $Rn">, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> { let Inst{23-22} = 0b00; let sh = 0b100000; let Inst{4} = 0; @@ -2346,13 +2347,13 @@ def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), "usat", "\t$Rd, $sat_imm, $Rn$sh">, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU]> { let Inst{23-22} = 0b10; } def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), "usat16", "\t$Rd, $sat_imm, $Rn">, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> { let Inst{23-22} = 0b10; let sh = 0b100000; let Inst{4} = 0; @@ -2476,7 +2477,7 @@ let Constraints = "$src = $Rd" in def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$Rd, $imm", - [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { + [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; // should be 0. let Inst{25} = 1; @@ -2492,7 +2493,7 @@ def t2SBFX: T2TwoRegBitFI< (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), - IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> { + IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10100; @@ -2501,7 +2502,7 @@ def t2UBFX: T2TwoRegBitFI< (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), - IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> { + IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b11100; @@ -2527,7 +2528,7 @@ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm), IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm", [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, - bf_inv_mask_imm:$imm))]> { + bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; // should be 0. let Inst{25} = 1; @@ -3281,17 +3282,17 @@ AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>; def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeT2_ldrex, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3307,7 +3308,7 @@ AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteLd]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3315,17 +3316,17 @@ AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3341,7 +3342,7 @@ AddrModeNone, 4, NoItinerary, "ldaexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, - HasAcquireRelease, HasV7Clrex, IsNotMClass]> { + HasAcquireRelease, HasV7Clrex, IsNotMClass]>, Sched<[WriteLd]> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -3356,14 +3357,14 @@ "strexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), @@ -3371,7 +3372,7 @@ "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3388,7 +3389,7 @@ AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteST]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3399,7 +3400,7 @@ [(set rGPR:$Rd, (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex]>, Sched<[WriteST]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3408,7 +3409,7 @@ [(set rGPR:$Rd, (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex]>, Sched<[WriteST]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3416,7 +3417,8 @@ "stlex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, + Sched<[WriteST]> { bits<4> Rd; bits<4> Rt; bits<4> addr; @@ -3433,7 +3435,7 @@ AddrModeNone, 4, NoItinerary, "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex, IsNotMClass]> { + HasV7Clrex, IsNotMClass]>, Sched<[WriteST]> { bits<4> Rt2; let Inst{11-8} = Rt2; } Index: llvm/trunk/lib/Target/ARM/ARMSchedule.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMSchedule.td +++ llvm/trunk/lib/Target/ARM/ARMSchedule.td @@ -424,4 +424,4 @@ include "ARMScheduleSwift.td" include "ARMScheduleR52.td" include "ARMScheduleA57.td" -include "ARMScheduleM3.td" +include "ARMScheduleM4.td" Index: llvm/trunk/lib/Target/ARM/ARMScheduleM3.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleM3.td +++ llvm/trunk/lib/Target/ARM/ARMScheduleM3.td @@ -1,20 +0,0 @@ -//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the ARM Cortex-M3 processor. -// -//===----------------------------------------------------------------------===// - -def CortexM3Model : SchedMachineModel { - let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue - let MicroOpBufferSize = 0; // In-order - let LoadLatency = 2; // Latency when not pipelined, not pc-relative - let MispredictPenalty = 2; // Best case branch taken cost - - let CompleteModel = 0; -} Index: llvm/trunk/lib/Target/ARM/ARMScheduleM4.td =================================================================== --- llvm/trunk/lib/Target/ARM/ARMScheduleM4.td +++ llvm/trunk/lib/Target/ARM/ARMScheduleM4.td @@ -0,0 +1,119 @@ +//==- ARMScheduleM4.td - Cortex-M4 Scheduling Definitions -*- tablegen -*-====// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the SchedRead/Write data for the ARM Cortex-M4 processor. +// +//===----------------------------------------------------------------------===// + +def CortexM4Model : SchedMachineModel { + let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue + let MicroOpBufferSize = 0; // In-order + let LoadLatency = 2; // Latency when not pipelined, not pc-relative + let MispredictPenalty = 2; // Best case branch taken cost + let PostRAScheduler = 1; + + let CompleteModel = 0; +} + + +// We model the entire cpu as a single pipeline with a BufferSize = 0 since +// Cortex-M4 is in-order. + +def M4Unit : ProcResource<1> { let BufferSize = 0; } + + +let SchedModel = CortexM4Model in { + +// Some definitions of latencies we apply to different instructions + +class M4UnitL1 : WriteRes { let Latency = 1; } +class M4UnitL2 : WriteRes { let Latency = 2; } +class M4UnitL3 : WriteRes { let Latency = 3; } +class M4UnitL14 : WriteRes { let Latency = 14; } +def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; } +def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; } +class M4UnitL1I : InstRW<[M4UnitL1_wr], instr>; +class M4UnitL2I : InstRW<[M4UnitL2_wr], instr>; + + +// Loads, MAC's and DIV all get a higher latency of 2 +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; + +def : M4UnitL2I<(instregex "(t|t2)LDM")>; + + +// Stores we use a latency of 1 as they have no outputs + +def : M4UnitL1; +def : M4UnitL1I<(instregex "(t|t2)STM")>; + + +// Everything else has a Latency of 1 + +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1I<(instregex "(t|t2)MOV")>; +def : M4UnitL1I<(instrs COPY)>; +def : M4UnitL1I<(instregex "t2IT")>; +def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", + "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's. +// Loads still take 2 cycles. + +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL2I<(instregex "VLD")>; +def : M4UnitL1I<(instregex "VST")>; +def : M4UnitL3; +def : M4UnitL3; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; + +def : ReadAdvance; +def : ReadAdvance; + +} Index: llvm/trunk/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll +++ llvm/trunk/test/CodeGen/ARM/ParallelDSP/multi-use-loads.ll @@ -5,10 +5,10 @@ ; CHECK-LABEL: add_user ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! ; CHECK: sxtah [[COUNT:r[0-9]+]], [[COUNT]], [[A]] +; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] define i32 @add_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: %cmp24 = icmp sgt i32 %arg, 0 @@ -53,10 +53,10 @@ ; CHECK-LABEL: mul_bottom_user ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! ; CHECK: sxth [[SXT:r[0-9]+]], [[A]] +; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] ; CHECK: mul [[COUNT:r[0-9]+]],{{.*}}[[SXT]] define i32 @mul_bottom_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: @@ -104,8 +104,8 @@ ; CHECK: %for.body ; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! ; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]] -; CHECK: asr.w [[ASR:[rl0-9]+]], [[B]], #16 +; CHECK: asrs [[ASR:[rl0-9]+]], [[A]], #16 +; CHECK: smlad [[ACC:[rl0-9]+]], [[A]], [[B]], [[ACC]] ; CHECK: mul [[COUNT:[rl0-9]+]],{{.}}[[ASR]] define i32 @mul_top_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: @@ -151,10 +151,10 @@ ; CHECK-LABEL: and_user ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]],{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]],{{.*}}, #2]! -; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]],{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]],{{.*}}, #2]! ; CHECK: uxth [[UXT:r[0-9]+]], [[A]] +; CHECK: smlad [[ACC:r[0-9]+]], [[B]], [[A]], [[ACC]] ; CHECK: mul [[MUL:r[0-9]+]],{{.*}}[[UXT]] define i32 @and_user(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: @@ -201,12 +201,12 @@ ; CHECK-LABEL: multi_uses ; CHECK: %for.body -; CHECK: ldr [[A:r[0-9]+]], [{{.*}}, #2]! -; CHECK: ldr [[B:r[0-9]+]], [{{.*}}, #2]! -; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]] +; CHECK: ldr [[A:[rl0-9]+]], [{{.*}}, #2]! +; CHECK: ldr [[B:[rl0-9]+]], [{{.*}}, #2]! ; CHECK: sxth [[SXT:r[0-9]+]], [[A]] +; CHECK: smlad [[ACC:[rl0-9]+]], [[B]], [[A]], [[ACC]] ; CHECK: eor.w [[EOR:r[0-9]+]], [[SXT]], [[SHIFT:r[0-9]+]] -; CHECK: mul [[MUL:r[0-9]+]],{{.*}}[[SXT]] +; CHECK: muls [[MUL:r[0-9]+]],{{.*}}[[SXT]] ; CHECK: lsl.w [[SHIFT]], [[MUL]], #16 define i32 @multi_uses(i32 %arg, i32* nocapture readnone %arg1, i16* nocapture readonly %arg2, i16* nocapture readonly %arg3) { entry: Index: llvm/trunk/test/CodeGen/ARM/aapcs-hfa-code.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/aapcs-hfa-code.ll +++ llvm/trunk/test/CodeGen/ARM/aapcs-hfa-code.ll @@ -76,8 +76,8 @@ ; CHECK-M4F-LABEL: test_1double_nosplit: ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp] ; CHECK-M4F: bl test_1double_nosplit call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0) @@ -97,8 +97,8 @@ ; CHECK-M4F-LABEL: test_1double_misaligned: ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8] ; CHECK-M4F: bl test_1double_misaligned Index: llvm/trunk/test/CodeGen/ARM/useaa.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/useaa.ll +++ llvm/trunk/test/CodeGen/ARM/useaa.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA +; RUN: llc < %s -mtriple=armv7m-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA +; RUN: llc < %s -mtriple=armv8m-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; Check we use AA during codegen, so can interleave these loads/stores. Index: llvm/trunk/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll +++ llvm/trunk/test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll @@ -100,10 +100,10 @@ ; CHECK-BP: str ; CHECK-BP: b ; CHECK-BP: str -; CHECK-BP: ldr +; CHECK-BP: add ; CHECK-NOBP: ittee ; CHECK-NOBP: streq -; CHECK-NOBP: ldreq +; CHECK-NOBP: addeq ; CHECK-NOBP: strne ; CHECK-NOBP: strne define i32 @diamond2(i32 %n, i32* %p, i32* %q) { @@ -119,7 +119,7 @@ if.else: store i32 %n, i32* %q, align 4 - %0 = load i32, i32* %p, align 4 + %0 = add i32 %n, 10 br label %if.end if.end: Index: llvm/trunk/test/CodeGen/Thumb2/m4-sched-ldr.mir =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/m4-sched-ldr.mir +++ llvm/trunk/test/CodeGen/Thumb2/m4-sched-ldr.mir @@ -0,0 +1,60 @@ +# RUN: llc %s -run-pass machine-scheduler -o - | FileCheck %s + +# CHECK-LABEL: bb.0. +# CHECK: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2ADDri +# CHECK-NEXT: t2ADDri +--- | + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7em-arm-none-eabi" + + ; Function Attrs: norecurse nounwind optsize readonly + define dso_local i32 @test(i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr #0 { + entry: + %0 = load i32, i32* %a, align 4 + %add = add nsw i32 %0, 10 + %1 = load i32, i32* %b, align 4 + %add1 = add nsw i32 %1, 20 + %mul = mul nsw i32 %add1, %add + ret i32 %mul + } + + attributes #0 = { "target-cpu"="cortex-m4" } + +... +--- +name: test +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } + - { id: 2, class: gprnopc, preferred-register: '' } + - { id: 3, class: rgpr, preferred-register: '' } + - { id: 4, class: gprnopc, preferred-register: '' } + - { id: 5, class: rgpr, preferred-register: '' } + - { id: 6, class: rgpr, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%0' } + - { reg: '$r1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $r0, $r1 + + %1:gpr = COPY $r1 + %0:gpr = COPY $r0 + %2:gprnopc = t2LDRi12 %0, 0, 14, $noreg :: (load 4 from %ir.a) + %3:rgpr = nsw t2ADDri %2, 10, 14, $noreg, $noreg + %4:gprnopc = t2LDRi12 %1, 0, 14, $noreg :: (load 4 from %ir.b) + %5:rgpr = nsw t2ADDri %4, 20, 14, $noreg, $noreg + %6:rgpr = nsw t2MUL %5, %3, 14, $noreg + $r0 = COPY %6 + tBX_RET 14, $noreg, implicit $r0 + +... Index: llvm/trunk/test/CodeGen/Thumb2/m4-sched-regs.ll =================================================================== --- llvm/trunk/test/CodeGen/Thumb2/m4-sched-regs.ll +++ llvm/trunk/test/CodeGen/Thumb2/m4-sched-regs.ll @@ -0,0 +1,52 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc %s -o - | FileCheck %s + +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" +target triple = "thumbv7em-arm-none-eabi" + +%struct.a = type { i32, %struct.b*, i8, i8, i8, i8, i8*, %struct.b*, i16, i16, i16, i16, i16, i16, i16, i16, i32, i32, i32, i32, i32, i32, i32 } +%struct.b = type { i8, i8, i8, i8, i32, i16, i16, i32, i32, i32, i32, [16 x i8], [64 x i8], [128 x i8], i32, [68 x i8] } + +define void @test(%struct.a* nocapture %dhcp, i16 zeroext %value) #0 { +; CHECK-LABEL: test: +; CHECK: @ %bb.0: @ %entry +; CHECK-NEXT: .save {r7, lr} +; CHECK-NEXT: push {r7, lr} +; CHECK-NEXT: ldrh r3, [r0, #20] +; CHECK-NEXT: ldr.w lr, [r0, #16] +; CHECK-NEXT: lsr.w r12, r1, #8 +; CHECK-NEXT: adds r2, r3, #1 +; CHECK-NEXT: strh r2, [r0, #20] +; CHECK-NEXT: add.w r2, lr, r3 +; CHECK-NEXT: strb.w r12, [r2, #240] +; CHECK-NEXT: ldrh r2, [r0, #20] +; CHECK-NEXT: ldr.w r12, [r0, #16] +; CHECK-NEXT: adds r3, r2, #1 +; CHECK-NEXT: strh r3, [r0, #20] +; CHECK-NEXT: add.w r0, r12, r2 +; CHECK-NEXT: strb.w r1, [r0, #240] +; CHECK-NEXT: pop {r7, pc} +entry: + %shr = lshr i16 %value, 8 + %conv1 = trunc i16 %shr to i8 + %msg_out = getelementptr inbounds %struct.a, %struct.a* %dhcp, i32 0, i32 7 + %0 = load %struct.b*, %struct.b** %msg_out, align 4 + %options_out_len = getelementptr inbounds %struct.a, %struct.a* %dhcp, i32 0, i32 8 + %1 = load i16, i16* %options_out_len, align 4 + %inc = add i16 %1, 1 + store i16 %inc, i16* %options_out_len, align 4 + %idxprom = zext i16 %1 to i32 + %arrayidx = getelementptr inbounds %struct.b, %struct.b* %0, i32 0, i32 15, i32 %idxprom + store i8 %conv1, i8* %arrayidx, align 1 + %conv4 = trunc i16 %value to i8 + %2 = load %struct.b*, %struct.b** %msg_out, align 4 + %3 = load i16, i16* %options_out_len, align 4 + %inc8 = add i16 %3, 1 + store i16 %inc8, i16* %options_out_len, align 4 + %idxprom9 = zext i16 %3 to i32 + %arrayidx10 = getelementptr inbounds %struct.b, %struct.b* %2, i32 0, i32 15, i32 %idxprom9 + store i8 %conv4, i8* %arrayidx10, align 1 + ret void +} + +attributes #0 = { minsize optsize "target-cpu"="cortex-m4" }