Index: lib/Target/ARM/ARM.td =================================================================== --- lib/Target/ARM/ARM.td +++ lib/Target/ARM/ARM.td @@ -977,21 +977,27 @@ FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR]>; -def : ProcessorModel<"cortex-m3", CortexM3Model, [ARMv7m, +def : ProcessorModel<"cortex-m3", CortexM4Model, [ARMv7m, ProcM3, FeaturePrefLoopAlign32, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"sc300", CortexM3Model, [ARMv7m, +def : ProcessorModel<"sc300", CortexM4Model, [ARMv7m, ProcM3, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; -def : ProcessorModel<"cortex-m4", CortexM3Model, [ARMv7em, +def : ProcessorModel<"cortex-m4", CortexM4Model, [ARMv7em, FeatureVFP4, FeatureVFPOnlySP, FeatureD16, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-m7", [ARMv7em, @@ -1001,13 +1007,15 @@ def : ProcNoItin<"cortex-m23", [ARMv8mBaseline, FeatureNoMovt]>; -def : ProcessorModel<"cortex-m33", CortexM3Model, [ARMv8mMainline, +def : ProcessorModel<"cortex-m33", CortexM4Model, [ARMv8mMainline, FeatureDSP, FeatureFPARMv8, FeatureD16, FeatureVFPOnlySP, FeaturePrefLoopAlign32, FeatureHasSlowFPVMLx, + FeatureUseMISched, + FeatureUseAA, FeatureHasNoBranchPredictor]>; def : ProcNoItin<"cortex-a32", [ARMv8a, Index: lib/Target/ARM/ARMInstrThumb.td =================================================================== --- lib/Target/ARM/ARMInstrThumb.td +++ lib/Target/ARM/ARMInstrThumb.td @@ -651,7 +651,7 @@ def tLDRpci : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_pc:$addr), IIC_iLoad_i, "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load (ARMWrapper tconstpool:$addr)))]>, - T1Encoding<{0,1,0,0,1,?}> { + T1Encoding<{0,1,0,0,1,?}>, Sched<[WriteLd]> { // A6.2 & A8.6.59 bits<3> Rt; bits<8> addr; @@ -665,7 +665,7 @@ def tLDRspi : T1pIs<(outs tGPR:$Rt), (ins t_addrmode_sp:$addr), IIC_iLoad_i, "ldr", "\t$Rt, $addr", [(set tGPR:$Rt, (load t_addrmode_sp:$addr))]>, - T1LdStSP<{1,?,?}> { + T1LdStSP<{1,?,?}>, Sched<[WriteLd]> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; @@ -716,39 +716,41 @@ defm tLDR : thumb_ld_rr_ri_enc<0b100, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iLoad_r, IIC_iLoad_i, "ldr", - load>; + load>, Sched<[WriteLd]>; // A8.6.64 & A8.6.61 defm tLDRB : thumb_ld_rr_ri_enc<0b110, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrb", - zextloadi8>; + zextloadi8>, Sched<[WriteLd]>; // A8.6.76 & A8.6.73 defm tLDRH : thumb_ld_rr_ri_enc<0b101, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iLoad_bh_r, IIC_iLoad_bh_i, "ldrh", - zextloadi16>; + zextloadi16>, Sched<[WriteLd]>; let AddedComplexity = 10 in def tLDRSB : // A8.6.80 T1pILdStEncode<0b011, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr), AddrModeT1_1, IIC_iLoad_bh_r, "ldrsb", "\t$Rt, $addr", - [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>; + [(set tGPR:$Rt, (sextloadi8 t_addrmode_rr:$addr))]>, + Sched<[WriteLd]>; let AddedComplexity = 10 in def tLDRSH : // A8.6.84 T1pILdStEncode<0b111, (outs tGPR:$Rt), (ins t_addrmode_rr:$addr), AddrModeT1_2, IIC_iLoad_bh_r, "ldrsh", "\t$Rt, $addr", - [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>; + [(set tGPR:$Rt, (sextloadi16 t_addrmode_rr:$addr))]>, + Sched<[WriteLd]>; def tSTRspi : T1pIs<(outs), (ins tGPR:$Rt, t_addrmode_sp:$addr), IIC_iStore_i, "str", "\t$Rt, $addr", [(store tGPR:$Rt, t_addrmode_sp:$addr)]>, - T1LdStSP<{0,?,?}> { + T1LdStSP<{0,?,?}>, Sched<[WriteST]> { bits<3> Rt; bits<8> addr; let Inst{10-8} = Rt; @@ -759,19 +761,19 @@ defm tSTR : thumb_st_rr_ri_enc<0b000, 0b0110, t_addrmode_rr, t_addrmode_is4, AddrModeT1_4, IIC_iStore_r, IIC_iStore_i, "str", - store>; + store>, Sched<[WriteST]>; // A8.6.197 & A8.6.195 defm tSTRB : thumb_st_rr_ri_enc<0b010, 0b0111, t_addrmode_rr, t_addrmode_is1, AddrModeT1_1, IIC_iStore_bh_r, IIC_iStore_bh_i, "strb", - truncstorei8>; + truncstorei8>, Sched<[WriteST]>; // A8.6.207 & A8.6.205 defm tSTRH : thumb_st_rr_ri_enc<0b001, 0b1000, t_addrmode_rr, t_addrmode_is2, AddrModeT1_2, IIC_iStore_bh_r, IIC_iStore_bh_i, "strh", - truncstorei16>; + truncstorei16>, Sched<[WriteST]>; //===----------------------------------------------------------------------===// @@ -830,7 +832,7 @@ def tPOP : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iPop, "pop${p}\t$regs", []>, - T1Misc<{1,1,0,?,?,?,?}> { + T1Misc<{1,1,0,?,?,?,?}>, Sched<[WriteLd]> { bits<16> regs; let Inst{8} = regs{15}; let Inst{7-0} = regs{7-0}; @@ -840,7 +842,7 @@ def tPUSH : T1I<(outs), (ins pred:$p, reglist:$regs, variable_ops), IIC_iStore_m, "push${p}\t$regs", []>, - T1Misc<{0,1,0,?,?,?,?}> { + T1Misc<{0,1,0,?,?,?,?}>, Sched<[WriteST]> { bits<16> regs; let Inst{8} = regs{14}; let Inst{7-0} = regs{7-0}; @@ -1201,7 +1203,7 @@ Thumb1sI<(outs tGPR:$Rd), (ins tGPR:$Rn, tGPR:$Rm), AddrModeNone, 2, IIC_iMUL32, "mul", "\t$Rd, $Rn, $Rm", "$Rm = $Rd", [(set tGPR:$Rd, (mul tGPR:$Rn, tGPR:$Rm))]>, - T1DataProcessing<0b1101> { + T1DataProcessing<0b1101>, Sched<[WriteMUL32, ReadMUL, ReadMUL]> { bits<3> Rd; bits<3> Rn; let Inst{5-3} = Rn; Index: lib/Target/ARM/ARMInstrThumb2.td =================================================================== --- lib/Target/ARM/ARMInstrThumb2.td +++ lib/Target/ARM/ARMInstrThumb2.td @@ -1334,7 +1334,8 @@ def t2LDRB_POST : T2Ipostldst<0, 0b00, 1, 0, (outs GPR:$Rt, GPR:$Rn_wb), (ins addr_offset_none:$Rn, t2am_imm8_offset:$offset), AddrModeT2_i8, IndexModePost, IIC_iLoad_bh_iu, - "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>; + "ldrb", "\t$Rt, $Rn$offset", "$Rn = $Rn_wb", []>, + Sched<[WriteLd]>; def t2LDRH_PRE : T2Ipreldst<0, 0b01, 1, 1, (outs GPR:$Rt, GPR:$Rn_wb), (ins t2addrmode_imm8_pre:$addr), @@ -2321,14 +2322,14 @@ def t2SSAT: T2SatI<(ins imm1_32:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), "ssat", "\t$Rd, $sat_imm, $Rn$sh">, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU]> { let Inst{23-22} = 0b00; let Inst{5} = 0; } def t2SSAT16: T2SatI<(ins imm1_16:$sat_imm, rGPR:$Rn), "ssat16", "\t$Rd, $sat_imm, $Rn">, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> { let Inst{23-22} = 0b00; let sh = 0b100000; let Inst{4} = 0; @@ -2336,13 +2337,13 @@ def t2USAT: T2SatI<(ins imm0_31:$sat_imm, rGPR:$Rn, t2_shift_imm:$sh), "usat", "\t$Rd, $sat_imm, $Rn$sh">, - Requires<[IsThumb2]> { + Requires<[IsThumb2]>, Sched<[WriteALU]> { let Inst{23-22} = 0b10; } def t2USAT16: T2SatI<(ins imm0_15:$sat_imm, rGPR:$Rn), "usat16", "\t$Rd, $sat_imm, $Rn">, - Requires<[IsThumb2, HasDSP]> { + Requires<[IsThumb2, HasDSP]>, Sched<[WriteALU]> { let Inst{23-22} = 0b10; let sh = 0b100000; let Inst{4} = 0; @@ -2466,7 +2467,7 @@ let Constraints = "$src = $Rd" in def t2BFC : T2BitFI<(outs rGPR:$Rd), (ins rGPR:$src, bf_inv_mask_imm:$imm), IIC_iUNAsi, "bfc", "\t$Rd, $imm", - [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]> { + [(set rGPR:$Rd, (and rGPR:$src, bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; // should be 0. let Inst{25} = 1; @@ -2482,7 +2483,7 @@ def t2SBFX: T2TwoRegBitFI< (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), - IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []> { + IIC_iUNAsi, "sbfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b10100; @@ -2491,7 +2492,7 @@ def t2UBFX: T2TwoRegBitFI< (outs rGPR:$Rd), (ins rGPR:$Rn, imm0_31:$lsb, imm1_32:$msb), - IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []> { + IIC_iUNAsi, "ubfx", "\t$Rd, $Rn, $lsb, $msb", []>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{25} = 1; let Inst{24-20} = 0b11100; @@ -2517,7 +2518,7 @@ (ins rGPR:$src, rGPR:$Rn, bf_inv_mask_imm:$imm), IIC_iBITi, "bfi", "\t$Rd, $Rn, $imm", [(set rGPR:$Rd, (ARMbfi rGPR:$src, rGPR:$Rn, - bf_inv_mask_imm:$imm))]> { + bf_inv_mask_imm:$imm))]>, Sched<[WriteALU]> { let Inst{31-27} = 0b11110; let Inst{26} = 0; // should be 0. let Inst{25} = 1; @@ -3276,17 +3277,17 @@ AddrModeNone, 4, NoItinerary, "ldrexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>; def t2LDREXH : T2I_ldrex<0b0101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldrexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]>; def t2LDREX : Thumb2I<(outs rGPR:$Rt), (ins t2addrmode_imm0_1020s4:$addr), AddrModeT2_ldrex, 4, NoItinerary, "ldrex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldrex_4 t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteLd]> { bits<4> Rt; bits<12> addr; let Inst{31-27} = 0b11101; @@ -3302,7 +3303,7 @@ AddrModeNone, 4, NoItinerary, "ldrexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteLd]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3310,17 +3311,17 @@ AddrModeNone, 4, NoItinerary, "ldaexb", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_1 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>; def t2LDAEXH : T2I_ldrex<0b1101, (outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaexh", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_2 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>; + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]>; def t2LDAEX : Thumb2I<(outs rGPR:$Rt), (ins addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "ldaex", "\t$Rt, $addr", "", [(set rGPR:$Rt, (ldaex_4 addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, Sched<[WriteLd]> { bits<4> Rt; bits<4> addr; let Inst{31-27} = 0b11101; @@ -3336,7 +3337,7 @@ AddrModeNone, 4, NoItinerary, "ldaexd", "\t$Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, - HasAcquireRelease, HasV7Clrex, IsNotMClass]> { + HasAcquireRelease, HasV7Clrex, IsNotMClass]>, Sched<[WriteLd]> { bits<4> Rt2; let Inst{11-8} = Rt2; @@ -3351,14 +3352,14 @@ "strexb", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_1 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>; def t2STREXH : T2I_strex<0b0101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), AddrModeNone, 4, NoItinerary, "strexh", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_2 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]>; + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]>; def t2STREX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, t2addrmode_imm0_1020s4:$addr), @@ -3366,7 +3367,7 @@ "strex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (strex_4 rGPR:$Rt, t2addrmode_imm0_1020s4:$addr))]>, - Requires<[IsThumb, HasV8MBaseline]> { + Requires<[IsThumb, HasV8MBaseline]>, Sched<[WriteST]> { bits<4> Rd; bits<4> Rt; bits<12> addr; @@ -3383,7 +3384,7 @@ AddrModeNone, 4, NoItinerary, "strexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, - Requires<[IsThumb2, IsNotMClass]> { + Requires<[IsThumb2, IsNotMClass]>, Sched<[WriteST]> { bits<4> Rt2; let Inst{11-8} = Rt2; } @@ -3394,7 +3395,7 @@ [(set rGPR:$Rd, (stlex_1 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex]>, Sched<[WriteST]>; def t2STLEXH : T2I_strex<0b1101, (outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3403,7 +3404,7 @@ [(set rGPR:$Rd, (stlex_2 rGPR:$Rt, addr_offset_none:$addr))]>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex]>; + HasV7Clrex]>, Sched<[WriteST]>; def t2STLEX : Thumb2I<(outs rGPR:$Rd), (ins rGPR:$Rt, addr_offset_none:$addr), @@ -3411,7 +3412,8 @@ "stlex", "\t$Rd, $Rt, $addr", "", [(set rGPR:$Rd, (stlex_4 rGPR:$Rt, addr_offset_none:$addr))]>, - Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]> { + Requires<[IsThumb, HasAcquireRelease, HasV7Clrex]>, + Sched<[WriteST]> { bits<4> Rd; bits<4> Rt; bits<4> addr; @@ -3428,7 +3430,7 @@ AddrModeNone, 4, NoItinerary, "stlexd", "\t$Rd, $Rt, $Rt2, $addr", "", [], {?, ?, ?, ?}>, Requires<[IsThumb, HasAcquireRelease, - HasV7Clrex, IsNotMClass]> { + HasV7Clrex, IsNotMClass]>, Sched<[WriteST]> { bits<4> Rt2; let Inst{11-8} = Rt2; } Index: lib/Target/ARM/ARMSchedule.td =================================================================== --- lib/Target/ARM/ARMSchedule.td +++ lib/Target/ARM/ARMSchedule.td @@ -425,4 +425,4 @@ include "ARMScheduleSwift.td" include "ARMScheduleR52.td" include "ARMScheduleA57.td" -include "ARMScheduleM3.td" +include "ARMScheduleM4.td" Index: lib/Target/ARM/ARMScheduleM3.td =================================================================== --- lib/Target/ARM/ARMScheduleM3.td +++ /dev/null @@ -1,21 +0,0 @@ -//=- ARMScheduleM3.td - ARM Cortex-M3 Scheduling Definitions -*- tablegen -*-=// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the machine model for the ARM Cortex-M3 processor. -// -//===----------------------------------------------------------------------===// - -def CortexM3Model : SchedMachineModel { - let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue - let MicroOpBufferSize = 0; // In-order - let LoadLatency = 2; // Latency when not pipelined, not pc-relative - let MispredictPenalty = 2; // Best case branch taken cost - - let CompleteModel = 0; -} Index: lib/Target/ARM/ARMScheduleM4.td =================================================================== --- /dev/null +++ lib/Target/ARM/ARMScheduleM4.td @@ -0,0 +1,120 @@ +//=- ARMScheduleM4.td - ARM Cortex-M4 Scheduling Definitions -*- tablegen -*-=// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the machine model for the ARM Cortex-M4 processor. +// +//===----------------------------------------------------------------------===// + +def CortexM4Model : SchedMachineModel { + let IssueWidth = 1; // Only IT can be dual-issued, so assume single-issue + let MicroOpBufferSize = 0; // In-order + let LoadLatency = 2; // Latency when not pipelined, not pc-relative + let MispredictPenalty = 2; // Best case branch taken cost + let PostRAScheduler = 1; + + let CompleteModel = 0; +} + + +// We model the entire cpu as a single pipeline with a BufferSize = 0 since +// Cortex-M4 is in-order. + +def M4Unit : ProcResource<1> { let BufferSize = 0; } + + +let SchedModel = CortexM4Model in { + +// Some definitions of latencies we apply to different instructions + +class M4UnitL1 : WriteRes { let Latency = 1; } +class M4UnitL2 : WriteRes { let Latency = 2; } +class M4UnitL3 : WriteRes { let Latency = 3; } +class M4UnitL14 : WriteRes { let Latency = 14; } +def M4UnitL1_wr : SchedWriteRes<[M4Unit]> { let Latency = 1; } +def M4UnitL2_wr : SchedWriteRes<[M4Unit]> { let Latency = 2; } +class M4UnitL1I : InstRW<[M4UnitL1_wr], instr>; +class M4UnitL2I : InstRW<[M4UnitL2_wr], instr>; + + +// Loads, MAC's and DIV all get a higher latency of 2 +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; +def : M4UnitL2; + +def : M4UnitL2I<(instregex "(t|t2)LDM")>; + + +// Stores we use a latency of 1 as they have no outputs + +def : M4UnitL1; +def : M4UnitL1I<(instregex "(t|t2)STM")>; + + +// Everything else has a Latency of 1 + +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1I<(instregex "(t|t2)MOV")>; +def : M4UnitL1I<(instrs COPY)>; +def : M4UnitL1I<(instregex "t2IT")>; +def : M4UnitL1I<(instregex "t2SEL", "t2USAD8", + "t2(S|Q|SH|U|UQ|UH)(ADD16|ASX|SAX|SUB16|ADD8|SUB8)", "t2USADA8", "(t|t2)REV")>; + +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; +def : ReadAdvance; + +// Most FP instructions are single-cycle latency, except MAC's, Div's and Sqrt's. +// Loads still take 2 cycles. + +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL2I<(instregex "VLD")>; +def : M4UnitL1I<(instregex "VST")>; +def : M4UnitL3; +def : M4UnitL3; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL14; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; +def : M4UnitL1; + +def : ReadAdvance; +def : ReadAdvance; + +} Index: test/CodeGen/ARM/aapcs-hfa-code.ll =================================================================== --- test/CodeGen/ARM/aapcs-hfa-code.ll +++ test/CodeGen/ARM/aapcs-hfa-code.ll @@ -76,8 +76,8 @@ ; CHECK-M4F-LABEL: test_1double_nosplit: ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp] ; CHECK-M4F: bl test_1double_nosplit call arm_aapcs_vfpcc void @test_1double_nosplit([4 x float] undef, [4 x double] undef, [3 x float] undef, double 1.0) @@ -97,8 +97,8 @@ ; CHECK-M4F-LABEL: test_1double_misaligned: ; CHECK-M4F: movs [[ONEHI:r[0-9]+]], #0 -; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: movt [[ONEHI]], #16368 +; CHECK-M4F: movs [[ONELO:r[0-9]+]], #0 ; CHECK-M4F: strd [[ONELO]], [[ONEHI]], [sp, #8] ; CHECK-M4F: bl test_1double_misaligned Index: test/CodeGen/ARM/useaa.ll =================================================================== --- test/CodeGen/ARM/useaa.ll +++ test/CodeGen/ARM/useaa.ll @@ -1,4 +1,6 @@ ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=cortex-r52 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA +; RUN: llc < %s -mtriple=armv7m-eabi -mcpu=cortex-m4 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA +; RUN: llc < %s -mtriple=armv8m-eabi -mcpu=cortex-m33 | FileCheck %s --check-prefix=CHECK --check-prefix=USEAA ; RUN: llc < %s -mtriple=armv8r-eabi -mcpu=generic | FileCheck %s --check-prefix=CHECK --check-prefix=GENERIC ; Check we use AA during codegen, so can interleave these loads/stores. Index: test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll =================================================================== --- test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll +++ test/CodeGen/Thumb2/ifcvt-no-branch-predictor.ll @@ -100,10 +100,10 @@ ; CHECK-BP: str ; CHECK-BP: b ; CHECK-BP: str -; CHECK-BP: ldr +; CHECK-BP: add ; CHECK-NOBP: ittee ; CHECK-NOBP: streq -; CHECK-NOBP: ldreq +; CHECK-NOBP: addeq ; CHECK-NOBP: strne ; CHECK-NOBP: strne define i32 @diamond2(i32 %n, i32* %p, i32* %q) { @@ -119,7 +119,7 @@ if.else: store i32 %n, i32* %q, align 4 - %0 = load i32, i32* %p, align 4 + %0 = add i32 %n, 10 br label %if.end if.end: Index: test/CodeGen/Thumb2/m4-sched-ldr.mir =================================================================== --- /dev/null +++ test/CodeGen/Thumb2/m4-sched-ldr.mir @@ -0,0 +1,60 @@ +# RUN: llc %s -run-pass machine-scheduler -o - | FileCheck %s + +# CHECK-LABEL: bb.0. +# CHECK: t2LDRi12 +# CHECK-NEXT: t2LDRi12 +# CHECK-NEXT: t2ADDri +# CHECK-NEXT: t2ADDri +--- | + target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv7em-arm-none-eabi" + + ; Function Attrs: norecurse nounwind optsize readonly + define dso_local i32 @test(i32* nocapture readonly %a, i32* nocapture readonly %b) local_unnamed_addr #0 { + entry: + %0 = load i32, i32* %a, align 4 + %add = add nsw i32 %0, 10 + %1 = load i32, i32* %b, align 4 + %add1 = add nsw i32 %1, 20 + %mul = mul nsw i32 %add1, %add + ret i32 %mul + } + + attributes #0 = { "target-cpu"="cortex-m4" } + +... +--- +name: test +alignment: 1 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +registers: + - { id: 0, class: gpr, preferred-register: '' } + - { id: 1, class: gpr, preferred-register: '' } + - { id: 2, class: gprnopc, preferred-register: '' } + - { id: 3, class: rgpr, preferred-register: '' } + - { id: 4, class: gprnopc, preferred-register: '' } + - { id: 5, class: rgpr, preferred-register: '' } + - { id: 6, class: rgpr, preferred-register: '' } +liveins: + - { reg: '$r0', virtual-reg: '%0' } + - { reg: '$r1', virtual-reg: '%1' } +body: | + bb.0.entry: + liveins: $r0, $r1 + + %1:gpr = COPY $r1 + %0:gpr = COPY $r0 + %2:gprnopc = t2LDRi12 %0, 0, 14, $noreg :: (load 4 from %ir.a) + %3:rgpr = nsw t2ADDri %2, 10, 14, $noreg, $noreg + %4:gprnopc = t2LDRi12 %1, 0, 14, $noreg :: (load 4 from %ir.b) + %5:rgpr = nsw t2ADDri %4, 20, 14, $noreg, $noreg + %6:rgpr = nsw t2MUL %5, %3, 14, $noreg + $r0 = COPY %6 + tBX_RET 14, $noreg, implicit $r0 + +...