Index: lib/Target/PowerPC/PPCSchedule.td =================================================================== --- lib/Target/PowerPC/PPCSchedule.td +++ lib/Target/PowerPC/PPCSchedule.td @@ -119,6 +119,7 @@ include "PPCScheduleG4Plus.td" include "PPCScheduleG5.td" include "PPCScheduleP7.td" +include "PPCScheduleP8.td" include "PPCScheduleA2.td" include "PPCScheduleE500mc.td" include "PPCScheduleE5500.td" Index: lib/Target/PowerPC/PPCScheduleP8.td =================================================================== --- lib/Target/PowerPC/PPCScheduleP8.td +++ lib/Target/PowerPC/PPCScheduleP8.td @@ -0,0 +1,371 @@ +//===-- PPCScheduleP8.td - PPC P8 Scheduling Definitions ---*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the itinerary class data for the POWER8 processor. +// +//===----------------------------------------------------------------------===// + +// Scheduling for the P8 involves tracking two types of resources: +// 1. The dispatch bundle slots +// 2. The functional unit resources + +// Dispatch units: +def P8_DU1 : FuncUnit; +def P8_DU2 : FuncUnit; +def P8_DU3 : FuncUnit; +def P8_DU4 : FuncUnit; +def P8_DU5 : FuncUnit; +def P8_DU6 : FuncUnit; +def P8_DU7 : FuncUnit; +def P8_DU8 : FuncUnit; + +def P8_LS1 : FuncUnit; // Load/Store pipeline 1 +def P8_LS2 : FuncUnit; // Load/Store pipeline 2 + +def P8_FX1 : FuncUnit; // FX pipeline 1 +def P8_FX2 : FuncUnit; // FX pipeline 2 + +// VS pipeline 1 (vector integer ops. always here) +def P8_VS1 : FuncUnit; // VS pipeline 1 +// VS pipeline 2 (128-bit stores and perms. here) +def P8_VS2 : FuncUnit; // VS pipeline 2 + +def P8_CRU : FuncUnit; // CR unit (CR logicals and move-from-SPRs) +def P8_BRU : FuncUnit; // BR unit + +def P8Itineraries : ProcessorItineraries< + [P8_DU1, P8_DU2, P8_DU3, P8_DU4, P8_DU5, P8_DU6, P8_DU7, P8_DU8, + P8_LS1, P8_LS2, P8_FX1, P8_FX2, P8_VS1, P8_VS2, P8_CRU, P8_BRU], [], [ + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2, + P8_LS1, P8_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<36, [P8_FX1, P8_FX2]>], + [36, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<68, [P8_FX1, P8_FX2]>], + [68, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1]>, + InstrItinData, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_CRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_BRU]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [4, 4, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [3, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_FX1, P8_FX2], 0>, + InstrStage<1, [P8_VS1, P8_VS2]>], + [2, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_LS1, P8_LS2], 0>, + InstrStage<1, [P8_VS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_LS1, P8_LS2]>], + [1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_DU2], 0>, + InstrStage<1, [P8_DU3], 0>, + InstrStage<1, [P8_DU4], 0>, + InstrStage<1, [P8_CRU]>, + InstrStage<1, [P8_FX1, P8_FX2]>], + [3, 1]>, // mtcr + InstrItinData, + InstrStage<1, [P8_CRU]>], + [6, 1]>, + InstrItinData, + InstrStage<1, [P8_CRU]>], + [3, 1]>, + InstrItinData, + InstrStage<1, [P8_FX1]>], + [4, 1]>, // mtctr + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [8, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [33, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [27, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [44, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [32, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [5, 1, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [5, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1]>], + [2, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1, P8_VS2]>], + [6, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS1]>], + [7, 1, 1]>, + InstrItinData, + InstrStage<1, [P8_VS2]>], + [3, 1, 1]> +]>; + +// ===---------------------------------------------------------------------===// +// P8 machine model for scheduling and other instruction cost heuristics. + +def P8Model : SchedMachineModel { + let IssueWidth = 8; // up to 8 instructions dispatched per cycle. + // up to six non-branch instructions. + // up to two branches in a dispatch group. + + let MinLatency = 0; // Out-of-order dispatch. + let LoadLatency = 3; // Optimistic load latency assuming bypass. + // This is overriden by OperandCycles if the + // Itineraries are queried instead. + let MispredictPenalty = 16; + + let Itineraries = P8Itineraries; +} +