|
| 1 | +//==- AArch64SchedKryo.td - Qualcomm Kryo Scheduling Defs ---*- tablegen -*-==// |
| 2 | +// |
| 3 | +// The LLVM Compiler Infrastructure |
| 4 | +// |
| 5 | +// This file is distributed under the University of Illinois Open Source |
| 6 | +// License. See LICENSE.TXT for details. |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | +// |
| 10 | +// This file defines the machine model for Qualcomm Kryo to support |
| 11 | +// instruction scheduling and other instruction cost heuristics. |
| 12 | +// |
| 13 | +//===----------------------------------------------------------------------===// |
| 14 | + |
| 15 | +//===----------------------------------------------------------------------===// |
| 16 | +// The issue width is set to five, matching the five issue queues for expanded |
| 17 | +// uops. Now, the latency spreadsheet has information based on fragmented uops, |
| 18 | +// but these do not actually take up an issue queue. |
| 19 | + |
| 20 | +def KryoModel : SchedMachineModel { |
| 21 | + let IssueWidth = 5; // 5-wide issue for expanded uops |
| 22 | + let MicroOpBufferSize = 128; // Out-of-order with temporary unified issue buffer |
| 23 | + let LoadLatency = 4; // Optimistic load latency |
| 24 | + let MispredictPenalty = 14; // Fetch + Decode/Rename/Dispatch + Branch |
| 25 | + |
| 26 | + // Enable partial & runtime unrolling. The magic number is chosen based on |
| 27 | + // experiments and benchmarking data. |
| 28 | + let LoopMicroOpBufferSize = 16; |
| 29 | +} |
| 30 | + |
| 31 | +//===----------------------------------------------------------------------===// |
| 32 | +// Define each kind of processor resource and number available on Kryo. |
| 33 | + |
| 34 | +let SchedModel = KryoModel in { |
| 35 | + def KryoUnitXA : ProcResource<1>; // Type X(A) micro-ops |
| 36 | + def KryoUnitXB : ProcResource<1>; // Type X(B) micro-ops |
| 37 | + def KryoUnitYA : ProcResource<1>; // Type Y(A) micro-ops |
| 38 | + def KryoUnitYB : ProcResource<1>; // Type Y(B) micro-ops |
| 39 | + def KryoUnitX : ProcResGroup<[KryoUnitXA, // Type X micro-ops |
| 40 | + KryoUnitXB]>; |
| 41 | + def KryoUnitY : ProcResGroup<[KryoUnitYA, // Type Y micro-ops |
| 42 | + KryoUnitYB]>; |
| 43 | + def KryoUnitXY : ProcResGroup<[KryoUnitXA, // Type XY micro-ops |
| 44 | + KryoUnitXB, |
| 45 | + KryoUnitYA, |
| 46 | + KryoUnitYB]>; |
| 47 | + def KryoUnitLSA : ProcResource<1>; // Type LS(A) micro-ops |
| 48 | + def KryoUnitLSB : ProcResource<1>; // Type LS(B) micro-ops |
| 49 | + def KryoUnitLS : ProcResGroup<[KryoUnitLSA, // Type LS micro-ops |
| 50 | + KryoUnitLSB]>; |
| 51 | +} |
| 52 | + |
| 53 | +let SchedModel = KryoModel in { |
| 54 | + |
| 55 | +//===----------------------------------------------------------------------===// |
| 56 | +// Map the target-defined scheduler read/write resources and latency for |
| 57 | +// Kryo. |
| 58 | + |
| 59 | +def : WriteRes<WriteImm, [KryoUnitXY]> { let Latency = 1; } |
| 60 | +def : WriteRes<WriteI, [KryoUnitXY]> { let Latency = 1; } |
| 61 | +def : WriteRes<WriteISReg, [KryoUnitXY, KryoUnitXY]> |
| 62 | + { let Latency = 2; let NumMicroOps = 2; } |
| 63 | +def : WriteRes<WriteIEReg, [KryoUnitXY, KryoUnitXY]> |
| 64 | + { let Latency = 2; let NumMicroOps = 2; } |
| 65 | +def : WriteRes<WriteExtr, [KryoUnitXY, KryoUnitX]> |
| 66 | + { let Latency = 2; let NumMicroOps = 2; } |
| 67 | +def : WriteRes<WriteIS, [KryoUnitXY]> { let Latency = 2; } |
| 68 | +def : WriteRes<WriteID32, [KryoUnitXA, KryoUnitY]> |
| 69 | + { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 |
| 70 | +def : WriteRes<WriteID64, [KryoUnitXA, KryoUnitY]> |
| 71 | + { let Latency = 8; let NumMicroOps = 1; } // Fragent -1 |
| 72 | +def : WriteRes<WriteIM32, [KryoUnitX]> { let Latency = 5; } |
| 73 | +def : WriteRes<WriteIM64, [KryoUnitX]> { let Latency = 5; } |
| 74 | +def : WriteRes<WriteBr, [KryoUnitXY]> { let Latency = 1; } |
| 75 | +def : WriteRes<WriteBrReg, [KryoUnitXY]> { let Latency = 1; } |
| 76 | +def : WriteRes<WriteLD, [KryoUnitLS]> { let Latency = 4; } |
| 77 | +def : WriteRes<WriteST, [KryoUnitLS]> { let Latency = 4; } |
| 78 | +def : WriteRes<WriteSTP, [KryoUnitLS]> { let Latency = 4; } |
| 79 | +def : WriteRes<WriteAdr, [KryoUnitXY]> { let Latency = 6; } |
| 80 | +def : WriteRes<WriteLDIdx, [KryoUnitLS]> { let Latency = 4; } |
| 81 | +def : WriteRes<WriteSTIdx, [KryoUnitLS]> { let Latency = 4; } |
| 82 | +def : WriteRes<WriteF, [KryoUnitXY, KryoUnitXY]> |
| 83 | + { let Latency = 3; let NumMicroOps = 2; } |
| 84 | +def : WriteRes<WriteFCmp, [KryoUnitXY]> { let Latency = 2; } |
| 85 | +def : WriteRes<WriteFCvt, [KryoUnitX]> { let Latency = 4; } |
| 86 | +def : WriteRes<WriteFCopy, [KryoUnitXY]> { let Latency = 6; } |
| 87 | +def : WriteRes<WriteFImm, [KryoUnitXY]> { let Latency = 6; } |
| 88 | +def : WriteRes<WriteFMul, [KryoUnitX, KryoUnitX]> |
| 89 | + { let Latency = 6; let NumMicroOps = 2; } |
| 90 | +def : WriteRes<WriteFDiv, [KryoUnitXA, KryoUnitY]> |
| 91 | + { let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1 |
| 92 | +def : WriteRes<WriteV, [KryoUnitXY]> { let Latency = 6; } |
| 93 | +def : WriteRes<WriteVLD, [KryoUnitLS]> { let Latency = 4; } |
| 94 | +def : WriteRes<WriteVST, [KryoUnitLS]> { let Latency = 4; } |
| 95 | + |
| 96 | +def : WriteRes<WriteSys, []> { let Latency = 1; } |
| 97 | +def : WriteRes<WriteBarrier, []> { let Latency = 1; } |
| 98 | +def : WriteRes<WriteHint, []> { let Latency = 1; } |
| 99 | + |
| 100 | +def : WriteRes<WriteLDHi, []> { let Latency = 4; } |
| 101 | + |
| 102 | +// No forwarding logic is modelled yet. |
| 103 | +def : ReadAdvance<ReadI, 0>; |
| 104 | +def : ReadAdvance<ReadISReg, 0>; |
| 105 | +def : ReadAdvance<ReadIEReg, 0>; |
| 106 | +def : ReadAdvance<ReadIM, 0>; |
| 107 | +def : ReadAdvance<ReadIMA, 0>; |
| 108 | +def : ReadAdvance<ReadID, 0>; |
| 109 | +def : ReadAdvance<ReadExtrHi, 0>; |
| 110 | +def : ReadAdvance<ReadAdrBase, 0>; |
| 111 | +def : ReadAdvance<ReadVLD, 0>; |
| 112 | + |
| 113 | + |
| 114 | +//===----------------------------------------------------------------------===// |
| 115 | +// Specialize the coarse model by associating instruction groups with the |
| 116 | +// subtarget-defined types. As the modeled is refined, this will override most |
| 117 | +// of the above SchedWriteRes and SchedAlias mappings. |
| 118 | + |
| 119 | +// Miscellaneous |
| 120 | +// ----------------------------------------------------------------------------- |
| 121 | + |
| 122 | +def : InstRW<[WriteI], (instrs COPY)>; |
| 123 | + |
| 124 | + |
| 125 | +// Detailed Refinedments |
| 126 | +// ----------------------------------------------------------------------------- |
| 127 | +include "AArch64SchedKryoDetails.td" |
| 128 | + |
| 129 | + |
| 130 | +} // SchedModel = KryoModel |
0 commit comments