Index: lib/Target/X86/X86ScheduleSLM.td =================================================================== --- lib/Target/X86/X86ScheduleSLM.td +++ lib/Target/X86/X86ScheduleSLM.td @@ -1,4 +1,4 @@ -//===- X86ScheduleSLM.td - X86 Atom Scheduling Definitions -*- tablegen -*-==// +//=- X86ScheduleSLM.td - X86 Silvermont Scheduling -----------*- tablegen -*-=// // // The LLVM Compiler Infrastructure // @@ -7,662 +7,222 @@ // //===----------------------------------------------------------------------===// // -// This file defines the itinerary class data for the Intel Atom -// (Silvermont) processor. +// This file defines the machine model for Intal Silvermont to support +// instruction scheduling and other instruction cost heuristics. // //===----------------------------------------------------------------------===// -def IEC_RSV0 : FuncUnit; -def IEC_RSV1 : FuncUnit; -def FPC_RSV0 : FuncUnit; -def FPC_RSV1 : FuncUnit; -def MEC_RSV : FuncUnit; - - - - - - - - - - - - - - -def SLMItineraries : ProcessorItineraries< - [ IEC_RSV0, IEC_RSV1, FPC_RSV0, FPC_RSV1, MEC_RSV ], - [], [ - // [InstrStage] - // [InstrStage, InstrStage] - // [InstrStage] - // [InstrStage,InstrStage] - // - // Default is 1 cycle, IEC_RSV0 or IEC_RSV1 - //InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // mul - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - // imul by al, ax, eax, rax - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - // imul reg by reg|mem - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - // imul reg = reg/mem * imm - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - // idiv - min latency - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // div - min latency - InstrItinData] >, - InstrItinData, - InstrStage<25, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // neg/not/inc/dec - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // add/sub/and/or/xor/adc/sbc/cmp/test - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // adc/sbb - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - // shift/rotate - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // shift double - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - // cmov - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - // set - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - // jcc - InstrItinData] >, - // jcxz/jecxz/jrcxz - InstrItinData] >, - // jmp rel - InstrItinData] >, - // jmp indirect - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - // jmp far - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - // loop/loope/loopne - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // call - all but reg/imm - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - //ret - InstrItinData] >, - InstrItinData] >, - //sign extension movs - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - //zero extension movs - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - - // SSE binary operations - // arithmetic fp scalar - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - - // arithmetic fp parallel - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<27, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<27, [MEC_RSV]>] >, - - // bitwise parallel - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - // arithmetic int parallel - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - - // multiply int parallel - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - - // shift parallel - InstrItinData] >, - InstrItinData, - InstrStage<2, [MEC_RSV]>] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<26, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<26, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<13, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<1, [FPC_RSV0, FPC_RSV1]>] >, - InstrItinData] >, - - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData, - InstrStage<9, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - - // conversions - // to/from PD ... - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - // to/from PS except to/from PD and PS2PI - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - - // MMX MOVs - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // other MMX - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // conversions - // from/to PD - InstrItinData] >, - InstrItinData] >, - // from/to PI - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +def SLMModel : SchedMachineModel { + // All x86 instructions are modeled as a single micro-op, and SLM can decode 2 + // instructions per cycle. + let IssueWidth = 2; + let MicroOpBufferSize = 32; // Based on the reorder buffer. + let LoadLatency = 3; + let MispredictPenalty = 10; + + // FIXME: SSE4 is unimplemented. This flag is set to allow + // the scheduler to assign a default model to unrecognized opcodes. + let CompleteModel = 0; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +let SchedModel = SLMModel in { + +// Silveromnt has 5 reservation stations for micro-ops + +def IEC_RSV0 : ProcResource<1>; +def IEC_RSV1 : ProcResource<1>; +def FPC_RSV0 : ProcResource<1> { let BufferSize = 1; } +def FPC_RSV1 : ProcResource<1> { let BufferSize = 1; } +def MEC_RSV : ProcResource<1>; + +// Many micro-ops are capable of issuing on multiple ports. +def IEC_RSV01 : ProcResGroup<[IEC_RSV0, IEC_RSV1]>; +def FPC_RSV01 : ProcResGroup<[FPC_RSV0, FPC_RSV1]>; + +def SMDivider : ProcResource<1>; +def SMFPMultiplier : ProcResource<1>; +def SMFPDivider : ProcResource<1>; + +// Loads are 3 cycles, so ReadAfterLd registers needn't be available until 3 +// cycles after the memory operand. +def : ReadAdvance; + +// Many SchedWrites are defined in pairs with and without a folded load. +// Instructions with folded loads are usually micro-fused, so they only appear +// as two micro-ops when queued in the reservation station. +// This multiclass defines the resource usage for variants with and without +// folded loads. +multiclass SMWriteResPair { + // Register variant is using a single cycle on ExePort. + def : WriteRes { let Latency = Lat; } + + // Memory variant also uses a cycle on MEC_RSV and adds 3 cycles to the + // latency. + def : WriteRes { + let Latency = !add(Lat, 3); + } +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// A folded store needs a cycle on MEC_RSV for the store data, but it does not +// need an extra port cycle to recompute the address. +def : WriteRes; + +def : WriteRes; +def : WriteRes { let Latency = 3; } +def : WriteRes; +def : WriteRes; + +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; + +// This is for simple LEAs with one or two input operands. +// The complex ones can only execute on port 1, and they require two cycles on +// the port to read all inputs. We don't model that. +def : WriteRes; + +// This is quite rough, latency depends on the dividend. +def : WriteRes { + let Latency = 25; + let ResourceCycles = [1, 25]; +} +def : WriteRes { + let Latency = 29; + let ResourceCycles = [1, 1, 25]; +} - // System instructions - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// Scalar and vector floating point. +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; + +// This is quite rough, latency depends on precision +def : WriteRes { + let Latency = 5; + let ResourceCycles = [1, 2]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [1, 1, 2]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +def : WriteRes { + let Latency = 34; + let ResourceCycles = [1, 34]; +} +def : WriteRes { + let Latency = 37; + let ResourceCycles = [1, 1, 34]; +} - InstrItinData] >, - InstrItinData] >, - // worst case for mov REG_CRx - InstrItinData] >, - InstrItinData] >, +// Vector integer operations. +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; + +// String instructions. +// Packed Compare Implicit Length Strings, Return Mask +def : WriteRes { + let Latency = 13; + let ResourceCycles = [13]; +} +def : WriteRes { + let Latency = 13; + let ResourceCycles = [13, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // LAR - InstrItinData] >, - InstrItinData] >, - // LSL - InstrItinData] >, - InstrItinData] >, +// Packed Compare Explicit Length Strings, Return Mask +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17]; +} +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // push control register, segment registers - InstrItinData] >, - InstrItinData] >, - // pop control register, segment registers - InstrItinData] >, - InstrItinData] >, - // VERR, VERW - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // WRMSR, RDMSR - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - // SMSW, LMSW - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// Packed Compare Implicit Length Strings, Return Index +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17]; +} +def : WriteRes { + let Latency = 17; + let ResourceCycles = [17, 1]; +} - InstrItinData] >, - InstrItinData] >, +// Packed Compare Explicit Length Strings, Return Index +def : WriteRes { + let Latency = 21; + let ResourceCycles = [21]; +} +def : WriteRes { + let Latency = 21; + let ResourceCycles = [21, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +// AES Instructions. +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5, 1]; +} - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5, 1]; +} - InstrItinData] >, - InstrItinData, - InstrStage<10, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData, - InstrStage<6, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData, - InstrStage<10, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<3, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<12, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<15, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<1, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<11, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<5, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<4, [MEC_RSV]>] >, - InstrItinData] >, - InstrItinData, - InstrStage<10, [MEC_RSV]>] >, +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5]; +} +def : WriteRes { + let Latency = 8; + let ResourceCycles = [5, 1]; +} - InstrItinData] > - ]>; +// Carry-less multiplication instructions. +def : WriteRes { + let Latency = 10; + let ResourceCycles = [10]; +} +def : WriteRes { + let Latency = 10; + let ResourceCycles = [10, 1]; +} -// Silvermont machine model. -def SLMModel : SchedMachineModel { - let IssueWidth = 2; // Allows 2 instructions per scheduling group. - let MinLatency = 1; // InstrStage cycles overrides MinLatency. - // OperandCycles may be used for expected latency. - let LoadLatency = 3; // Expected cycles, may be overriden by OperandCycles. - let HighLatency = 30;// Expected, may be overriden by OperandCycles. - let Itineraries = SLMItineraries; -} +def : WriteRes { let Latency = 100; } +def : WriteRes { let Latency = 100; } +def : WriteRes; +def : WriteRes; + +// AVX is not supported on that architecture, but we should define the basic +// scheduling resources anyway. +def : WriteRes; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +defm : SMWriteResPair; +} // SchedModel