|
| 1 | +//=- AArch64SchedM1.td - Samsung Exynos-M1 Scheduling Defs ---*- tablegen -*-=// |
| 2 | +// |
| 3 | +// The LLVM Compiler Infrastructure |
| 4 | +// |
| 5 | +// This file is distributed under the University of Illinois Open Source |
| 6 | +// License. See LICENSE.TXT for details. |
| 7 | +// |
| 8 | +//===----------------------------------------------------------------------===// |
| 9 | +// |
| 10 | +// This file defines the machine model for Samsung Exynos-M1 to support |
| 11 | +// instruction scheduling and other instruction cost heuristics. |
| 12 | +// |
| 13 | +//===----------------------------------------------------------------------===// |
| 14 | + |
| 15 | +//===----------------------------------------------------------------------===// |
| 16 | +// The Exynos-M1 is a traditional superscalar microprocessor with a |
| 17 | +// 4-wide in-order stage for decode and dispatch and a wider issue stage. |
| 18 | +// The execution units and loads and stores are out-of-order. |
| 19 | + |
| 20 | +def ExynosM1Model : SchedMachineModel { |
| 21 | + let IssueWidth = 4; // Up to 4 uops per cycle. |
| 22 | + let MinLatency = 0; // OoO. |
| 23 | + let MicroOpBufferSize = 96; // ROB size. |
| 24 | + let LoopMicroOpBufferSize = 32; // Instruction queue size. |
| 25 | + let LoadLatency = 4; // Optimistic load cases. |
| 26 | + let MispredictPenalty = 14; // Minimum branch misprediction penalty. |
| 27 | + let CompleteModel = 0; // Use the default model otherwise. |
| 28 | +} |
| 29 | + |
| 30 | +//===----------------------------------------------------------------------===// |
| 31 | +// Define each kind of processor resource and number available on the Exynos-M1, |
| 32 | +// which has 9 pipelines, each with its own queue with out-of-order dispatch. |
| 33 | + |
| 34 | +def M1UnitA : ProcResource<2>; // Simple integer |
| 35 | +def M1UnitC : ProcResource<1>; // Simple and complex integer |
| 36 | +def M1UnitB : ProcResource<2>; // Branch |
| 37 | +def M1UnitL : ProcResource<1>; // Load |
| 38 | +def M1UnitS : ProcResource<1>; // Store |
| 39 | +def M1PipeF0 : ProcResource<1>; // FP #0 |
| 40 | +def M1PipeF1 : ProcResource<1>; // FP #1 |
| 41 | + |
| 42 | +let Super = M1PipeF0 in { |
| 43 | + def M1UnitFMAC : ProcResource<1>; // FP multiplication |
| 44 | + def M1UnitFCVT : ProcResource<1>; // FP conversion |
| 45 | + def M1UnitNAL0 : ProcResource<1>; // Simple vector. |
| 46 | + def M1UnitNMISC : ProcResource<1>; // Miscellanea |
| 47 | + def M1UnitNCRYPT : ProcResource<1>; // Cryptographic |
| 48 | +} |
| 49 | + |
| 50 | +let Super = M1PipeF1 in { |
| 51 | + def M1UnitFADD : ProcResource<1>; // Simple FP |
| 52 | + let BufferSize = 1 in |
| 53 | + def M1UnitFVAR : ProcResource<1>; // FP division & square root (serialized) |
| 54 | + def M1UnitNAL1 : ProcResource<1>; // Simple vector. |
| 55 | + def M1UnitFST : ProcResource<1>; // FP store |
| 56 | +} |
| 57 | + |
| 58 | +let SchedModel = ExynosM1Model in { |
| 59 | + def M1UnitALU : ProcResGroup<[M1UnitA, |
| 60 | + M1UnitC]>; // All simple integer. |
| 61 | + def M1UnitNALU : ProcResGroup<[M1UnitNAL0, |
| 62 | + M1UnitNAL1]>; // All simple vector. |
| 63 | +} |
| 64 | + |
| 65 | +let SchedModel = ExynosM1Model in { |
| 66 | + |
| 67 | +//===----------------------------------------------------------------------===// |
| 68 | +// Coarse scheduling model for the Exynos-M1. |
| 69 | + |
| 70 | +// Branch instructions. |
| 71 | +// TODO: Non-conditional direct branches take zero cycles and units. |
| 72 | +def : WriteRes<WriteBr, [M1UnitB]> { let Latency = 1; } |
| 73 | +def : WriteRes<WriteBrReg, [M1UnitC]> { let Latency = 1; } |
| 74 | +// TODO: Branch and link is much different. |
| 75 | + |
| 76 | +// Arithmetic and logical integer instructions. |
| 77 | +def : WriteRes<WriteI, [M1UnitALU]> { let Latency = 1; } |
| 78 | +// TODO: Shift over 3 and some extensions take 2 cycles. |
| 79 | +def : WriteRes<WriteISReg, [M1UnitALU]> { let Latency = 1; } |
| 80 | +def : WriteRes<WriteIEReg, [M1UnitALU]> { let Latency = 1; } |
| 81 | +def : WriteRes<WriteIS, [M1UnitALU]> { let Latency = 1; } |
| 82 | + |
| 83 | +// Move instructions. |
| 84 | +def : WriteRes<WriteImm, [M1UnitALU]> { let Latency = 1; } |
| 85 | + |
| 86 | +// Divide and multiply instructions. |
| 87 | +// TODO: Division blocks the divider inside C. |
| 88 | +def : WriteRes<WriteID32, [M1UnitC]> { let Latency = 13; } |
| 89 | +def : WriteRes<WriteID64, [M1UnitC]> { let Latency = 21; } |
| 90 | +// TODO: Long multiplication take 5 cycles and also the ALU. |
| 91 | +// TODO: Multiplication with accumulation can be advanced. |
| 92 | +def : WriteRes<WriteIM32, [M1UnitC]> { let Latency = 3; } |
| 93 | +// TODO: 64-bit multiplication has a throughput of 1/2. |
| 94 | +def : WriteRes<WriteIM64, [M1UnitC]> { let Latency = 4; } |
| 95 | + |
| 96 | +// Miscellaneous instructions. |
| 97 | +def : WriteRes<WriteExtr, [M1UnitALU, |
| 98 | + M1UnitALU]> { let Latency = 2; } |
| 99 | + |
| 100 | +// TODO: The latency for the post or pre register is 1 cycle. |
| 101 | +def : WriteRes<WriteAdr, []> { let Latency = 0; } |
| 102 | + |
| 103 | +// Load instructions. |
| 104 | +def : WriteRes<WriteLD, [M1UnitL]> { let Latency = 4; } |
| 105 | +// TODO: Extended address requires also the ALU. |
| 106 | +def : WriteRes<WriteLDIdx, [M1UnitL]> { let Latency = 5; } |
| 107 | +def : WriteRes<WriteLDHi, [M1UnitALU]> { let Latency = 4; } |
| 108 | + |
| 109 | +// Store instructions. |
| 110 | +def : WriteRes<WriteST, [M1UnitS]> { let Latency = 1; } |
| 111 | +// TODO: Extended address requires also the ALU. |
| 112 | +def : WriteRes<WriteSTIdx, [M1UnitS]> { let Latency = 1; } |
| 113 | +def : WriteRes<WriteSTP, [M1UnitS]> { let Latency = 1; } |
| 114 | +def : WriteRes<WriteSTX, [M1UnitS]> { let Latency = 1; } |
| 115 | + |
| 116 | +// FP data instructions. |
| 117 | +def : WriteRes<WriteF, [M1UnitFADD]> { let Latency = 3; } |
| 118 | +// TODO: FCCMP is much different. |
| 119 | +def : WriteRes<WriteFCmp, [M1UnitNMISC]> { let Latency = 4; } |
| 120 | +// TODO: DP takes longer. |
| 121 | +def : WriteRes<WriteFDiv, [M1UnitFVAR]> { let Latency = 15; } |
| 122 | +// TODO: MACC takes longer. |
| 123 | +def : WriteRes<WriteFMul, [M1UnitFMAC]> { let Latency = 4; } |
| 124 | + |
| 125 | +// FP miscellaneous instructions. |
| 126 | +// TODO: Conversion between register files is much different. |
| 127 | +def : WriteRes<WriteFCvt, [M1UnitFCVT]> { let Latency = 3; } |
| 128 | +def : WriteRes<WriteFImm, [M1UnitNALU]> { let Latency = 1; } |
| 129 | +// TODO: Copy from FPR to GPR is much different. |
| 130 | +def : WriteRes<WriteFCopy, [M1UnitS]> { let Latency = 4; } |
| 131 | + |
| 132 | +// FP load instructions. |
| 133 | +// TODO: ASIMD loads are much different. |
| 134 | +def : WriteRes<WriteVLD, [M1UnitL]> { let Latency = 5; } |
| 135 | + |
| 136 | +// FP store instructions. |
| 137 | +// TODO: ASIMD stores are much different. |
| 138 | +def : WriteRes<WriteVST, [M1UnitS, M1UnitFST]> { let Latency = 1; } |
| 139 | + |
| 140 | +// ASIMD FP instructions. |
| 141 | +// TODO: Other operations are much different. |
| 142 | +def : WriteRes<WriteV, [M1UnitFADD]> { let Latency = 3; } |
| 143 | + |
| 144 | +// Other miscellaneous instructions. |
| 145 | +def : WriteRes<WriteSys, []> { let Latency = 1; } |
| 146 | +def : WriteRes<WriteBarrier, []> { let Latency = 1; } |
| 147 | +def : WriteRes<WriteHint, []> { let Latency = 1; } |
| 148 | + |
| 149 | +//===----------------------------------------------------------------------===// |
| 150 | +// Fast forwarding. |
| 151 | + |
| 152 | +// TODO: Add FP register forwarding rules. |
| 153 | + |
| 154 | +def : ReadAdvance<ReadI, 0>; |
| 155 | +def : ReadAdvance<ReadISReg, 0>; |
| 156 | +def : ReadAdvance<ReadIEReg, 0>; |
| 157 | +def : ReadAdvance<ReadIM, 0>; |
| 158 | +// Integer multiply-accumulate. |
| 159 | +// TODO: The forwarding for WriteIM64 saves actually 3 cycles. |
| 160 | +def : ReadAdvance<ReadIMA, 2, [WriteIM32, WriteIM64]>; |
| 161 | +def : ReadAdvance<ReadID, 0>; |
| 162 | +def : ReadAdvance<ReadExtrHi, 0>; |
| 163 | +def : ReadAdvance<ReadAdrBase, 0>; |
| 164 | +def : ReadAdvance<ReadVLD, 0>; |
| 165 | + |
| 166 | +//===----------------------------------------------------------------------===// |
| 167 | +// Finer scheduling model for the Exynos-M1. |
| 168 | + |
| 169 | +def M1WriteNEONA : SchedWriteRes<[M1UnitNALU, |
| 170 | + M1UnitNALU, |
| 171 | + M1UnitFADD]> { let Latency = 9; } |
| 172 | +def M1WriteNEONB : SchedWriteRes<[M1UnitNALU, |
| 173 | + M1UnitFST]> { let Latency = 5; } |
| 174 | +def M1WriteNEONC : SchedWriteRes<[M1UnitNALU, |
| 175 | + M1UnitFST]> { let Latency = 6; } |
| 176 | +def M1WriteNEOND : SchedWriteRes<[M1UnitNALU, |
| 177 | + M1UnitFST, |
| 178 | + M1UnitL]> { let Latency = 10; } |
| 179 | +def M1WriteNEONE : SchedWriteRes<[M1UnitFCVT, |
| 180 | + M1UnitFST]> { let Latency = 8; } |
| 181 | +def M1WriteNEONF : SchedWriteRes<[M1UnitFCVT, |
| 182 | + M1UnitFST, |
| 183 | + M1UnitL]> { let Latency = 13; } |
| 184 | +def M1WriteNEONG : SchedWriteRes<[M1UnitNMISC, |
| 185 | + M1UnitFST]> { let Latency = 6; } |
| 186 | +def M1WriteNEONH : SchedWriteRes<[M1UnitNALU, |
| 187 | + M1UnitFST]> { let Latency = 3; } |
| 188 | +def M1WriteNEONI : SchedWriteRes<[M1UnitFST, |
| 189 | + M1UnitL]> { let Latency = 9; } |
| 190 | +def M1WriteALU1 : SchedWriteRes<[M1UnitALU]> { let Latency = 1; } |
| 191 | +def M1WriteB : SchedWriteRes<[M1UnitB]> { let Latency = 1; } |
| 192 | +// FIXME: This is the worst case, conditional branch and link. |
| 193 | +def M1WriteBL : SchedWriteRes<[M1UnitB, |
| 194 | + M1UnitALU]> { let Latency = 1; } |
| 195 | +// FIXME: This is the worst case, when using LR. |
| 196 | +def M1WriteBLR : SchedWriteRes<[M1UnitB, |
| 197 | + M1UnitALU, |
| 198 | + M1UnitALU]> { let Latency = 2; } |
| 199 | +def M1WriteC1 : SchedWriteRes<[M1UnitC]> { let Latency = 1; } |
| 200 | +def M1WriteC2 : SchedWriteRes<[M1UnitC]> { let Latency = 2; } |
| 201 | +def M1WriteFADD3 : SchedWriteRes<[M1UnitFADD]> { let Latency = 3; } |
| 202 | +def M1WriteFCVT3 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 3; } |
| 203 | +def M1WriteFCVT4 : SchedWriteRes<[M1UnitFCVT]> { let Latency = 4; } |
| 204 | +def M1WriteFMAC4 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 4; } |
| 205 | +def M1WriteFMAC5 : SchedWriteRes<[M1UnitFMAC]> { let Latency = 5; } |
| 206 | +def M1WriteFVAR15 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 15; } |
| 207 | +def M1WriteFVAR23 : SchedWriteRes<[M1UnitFVAR]> { let Latency = 23; } |
| 208 | +def M1WriteNALU1 : SchedWriteRes<[M1UnitNALU]> { let Latency = 1; } |
| 209 | +def M1WriteNALU2 : SchedWriteRes<[M1UnitNALU]> { let Latency = 2; } |
| 210 | +def M1WriteNAL11 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 1; } |
| 211 | +def M1WriteNAL12 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 2; } |
| 212 | +def M1WriteNAL13 : SchedWriteRes<[M1UnitNAL1]> { let Latency = 3; } |
| 213 | +def M1WriteNCRYPT1 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 1; } |
| 214 | +def M1WriteNCRYPT5 : SchedWriteRes<[M1UnitNCRYPT]> { let Latency = 5; } |
| 215 | +def M1WriteNMISC1 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 1; } |
| 216 | +def M1WriteNMISC2 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 2; } |
| 217 | +def M1WriteNMISC3 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 3; } |
| 218 | +def M1WriteNMISC4 : SchedWriteRes<[M1UnitNMISC]> { let Latency = 4; } |
| 219 | +def M1WriteS4 : SchedWriteRes<[M1UnitS]> { let Latency = 4; } |
| 220 | +def M1WriteTB : SchedWriteRes<[M1UnitC, |
| 221 | + M1UnitALU]> { let Latency = 2; } |
| 222 | + |
| 223 | +// Branch instructions |
| 224 | +def : InstRW<[M1WriteB ], (instrs Bcc)>; |
| 225 | +def : InstRW<[M1WriteBL], (instrs BL)>; |
| 226 | +def : InstRW<[M1WriteBLR], (instrs BLR)>; |
| 227 | +def : InstRW<[M1WriteC1], (instregex "^CBN?Z[WX]")>; |
| 228 | +def : InstRW<[M1WriteTB], (instregex "^TBN?Z[WX]")>; |
| 229 | + |
| 230 | +// Arithmetic and logical integer instructions. |
| 231 | +def : InstRW<[M1WriteALU1], (instrs COPY)>; |
| 232 | + |
| 233 | +// Divide and multiply instructions. |
| 234 | + |
| 235 | +// Miscellaneous instructions. |
| 236 | + |
| 237 | +// Load instructions. |
| 238 | + |
| 239 | +// Store instructions. |
| 240 | + |
| 241 | +// FP data instructions. |
| 242 | +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)[DS]r")>; |
| 243 | +def : InstRW<[M1WriteFADD3], (instregex "^F(ADD|SUB)[DS]rr")>; |
| 244 | +def : InstRW<[M1WriteNEONG], (instregex "^FCCMPE?[DS]rr")>; |
| 245 | +def : InstRW<[M1WriteNMISC4], (instregex "^FCMPE?[DS]r")>; |
| 246 | +def : InstRW<[M1WriteFVAR15], (instrs FDIVSrr)>; |
| 247 | +def : InstRW<[M1WriteFVAR23], (instrs FDIVDrr)>; |
| 248 | +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN).+rr")>; |
| 249 | +def : InstRW<[M1WriteFMAC4], (instregex "^FN?MUL[DS]rr")>; |
| 250 | +def : InstRW<[M1WriteFMAC5], (instregex "^FN?M(ADD|SUB)[DS]rrr")>; |
| 251 | +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT.+r")>; |
| 252 | +def : InstRW<[M1WriteNEONH], (instregex "^FCSEL[DS]rrr")>; |
| 253 | +def : InstRW<[M1WriteFVAR15], (instrs FSQRTSr)>; |
| 254 | +def : InstRW<[M1WriteFVAR23], (instrs FSQRTDr)>; |
| 255 | + |
| 256 | +// FP miscellaneous instructions. |
| 257 | +def : InstRW<[M1WriteFCVT3], (instregex "^FCVT[DS][DS]r")>; |
| 258 | +def : InstRW<[M1WriteNEONF], (instregex "^[FSU]CVT[AMNPZ][SU](_Int)?[SU]?[XW]?[DS]?[rds]i?")>; |
| 259 | +def : InstRW<[M1WriteNEONE], (instregex "^[SU]CVTF[SU]")>; |
| 260 | +def : InstRW<[M1WriteNALU1], (instregex "^FMOV[DS][ir]")>; |
| 261 | +def : InstRW<[M1WriteS4], (instregex "^FMOV[WX][DS](High)?r")>; |
| 262 | +def : InstRW<[M1WriteNEONI], (instregex "^FMOV[DS][WX](High)?r")>; |
| 263 | + |
| 264 | +// FP load instructions. |
| 265 | + |
| 266 | +// FP store instructions. |
| 267 | + |
| 268 | +// ASIMD instructions. |
| 269 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]ABAL?v")>; |
| 270 | +def : InstRW<[M1WriteNMISC1], (instregex "^[SU]ABDL?v")>; |
| 271 | +def : InstRW<[M1WriteNMISC1], (instregex "^(SQ)?ABSv")>; |
| 272 | +def : InstRW<[M1WriteNMISC1], (instregex "^SQNEGv")>; |
| 273 | +def : InstRW<[M1WriteNALU1], (instregex "^(ADD|NEG|SUB)v")>; |
| 274 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?H(ADD|SUB)v")>; |
| 275 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?AD[AD](L|LP|P|W)V?2?v")>; |
| 276 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]?SUB[LW]2?v")>; |
| 277 | +def : InstRW<[M1WriteNMISC3], (instregex "^R?(ADD|SUB)HN?2?v")>; |
| 278 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]+Q(ADD|SUB)v")>; |
| 279 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU]RHADDv")>; |
| 280 | +def : InstRW<[M1WriteNMISC1], (instregex "^CM(EQ|GE|GT|HI|HS|LE|LT)v")>; |
| 281 | +def : InstRW<[M1WriteNALU1], (instregex "^CMTSTv")>; |
| 282 | +def : InstRW<[M1WriteNALU1], (instregex "^(AND|BIC|EOR|MVNI|NOT|ORN|ORR)v")>; |
| 283 | +def : InstRW<[M1WriteNMISC1], (instregex "^[SU](MIN|MAX)v")>; |
| 284 | +def : InstRW<[M1WriteNMISC2], (instregex "^[SU](MIN|MAX)Pv")>; |
| 285 | +def : InstRW<[M1WriteNMISC3], (instregex "^[SU](MIN|MAX)Vv")>; |
| 286 | +def : InstRW<[M1WriteNMISC4], (instregex "^(MUL|SQR?DMULH)v")>; |
| 287 | +def : InstRW<[M1WriteNMISC4], (instregex "^ML[AS]v")>; |
| 288 | +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD|SQRD)ML[AS][HL]v")>; |
| 289 | +def : InstRW<[M1WriteNMISC4], (instregex "^(S|U|SQD)MULLv")>; |
| 290 | +def : InstRW<[M1WriteNAL13], (instregex "^(S|SR|U|UR)SRAv")>; |
| 291 | +def : InstRW<[M1WriteNALU1], (instregex "^[SU]?SH(L|LL|R)2?v")>; |
| 292 | +def : InstRW<[M1WriteNALU1], (instregex "^S[LR]Iv")>; |
| 293 | +def : InstRW<[M1WriteNAL13], (instregex "^[SU]?(Q|QR|R)?SHR(N|U|UN)?2?v")>; |
| 294 | +def : InstRW<[M1WriteNAL13], (instregex "^[SU](Q|QR|R)SHLU?v")>; |
| 295 | + |
| 296 | +// ASIMD FP instructions. |
| 297 | +def : InstRW<[M1WriteNALU1], (instregex "^F(ABS|NEG)v")>; |
| 298 | +def : InstRW<[M1WriteNMISC3], (instregex "^F(ABD|ADD|SUB)v")>; |
| 299 | +def : InstRW<[M1WriteNEONA], (instregex "^FADDP")>; |
| 300 | +def : InstRW<[M1WriteNMISC1], (instregex "^F(AC|CM)(EQ|GE|GT|LE|LT)v[^1]")>; |
| 301 | +def : InstRW<[M1WriteFCVT3], (instregex "^[FVSU]CVTX?[AFLMNPZ][SU]?(_Int)?v")>; |
| 302 | +def : InstRW<[M1WriteFVAR15], (instregex "FDIVv.f32")>; |
| 303 | +def : InstRW<[M1WriteFVAR23], (instregex "FDIVv2f64")>; |
| 304 | +def : InstRW<[M1WriteFVAR15], (instregex "FSQRTv.f32")>; |
| 305 | +def : InstRW<[M1WriteFVAR23], (instregex "FSQRTv2f64")>; |
| 306 | +def : InstRW<[M1WriteNMISC1], (instregex "^F(MAX|MIN)(NM)?V?v")>; |
| 307 | +def : InstRW<[M1WriteNMISC2], (instregex "^F(MAX|MIN)(NM)?Pv")>; |
| 308 | +def : InstRW<[M1WriteFMAC4], (instregex "^FMULX?v")>; |
| 309 | +def : InstRW<[M1WriteFMAC5], (instregex "^FML[AS]v")>; |
| 310 | +def : InstRW<[M1WriteFCVT3], (instregex "^FRINT[AIMNPXZ]v")>; |
| 311 | + |
| 312 | +// ASIMD miscellaneous instructions. |
| 313 | +def : InstRW<[M1WriteNALU1], (instregex "^RBITv")>; |
| 314 | +def : InstRW<[M1WriteNAL11], (instregex "^(BIF|BIT|BSL)v")>; |
| 315 | +def : InstRW<[M1WriteNALU1], (instregex "^CPY")>; |
| 316 | +def : InstRW<[M1WriteNEONB], (instregex "^DUPv.+gpr")>; |
| 317 | +def : InstRW<[M1WriteNALU1], (instregex "^DUPv.+lane")>; |
| 318 | +def : InstRW<[M1WriteNAL13], (instregex "^[SU]?Q?XTU?Nv")>; |
| 319 | +def : InstRW<[M1WriteNEONC], (instregex "^INSv.+gpr")>; |
| 320 | +def : InstRW<[M1WriteFCVT4], (instregex "^[FU](RECP|RSQRT)Ev")>; |
| 321 | +def : InstRW<[M1WriteNMISC1], (instregex "^[FU](RECP|RSQRT)Xv")>; |
| 322 | +def : InstRW<[M1WriteFMAC5], (instregex "^F(RECP|RSQRT)Sv")>; |
| 323 | +def : InstRW<[M1WriteNALU1], (instregex "^REV(16|32|64)v")>; |
| 324 | +def : InstRW<[M1WriteNAL11], (instregex "^TB[LX]v8i8One")>; |
| 325 | +def : InstRW<[WriteSequence<[M1WriteNAL11], 2>], |
| 326 | + (instregex "^TB[LX]v8i8Two")>; |
| 327 | +def : InstRW<[WriteSequence<[M1WriteNAL11], 3>], |
| 328 | + (instregex "^TB[LX]v8i8Three")>; |
| 329 | +def : InstRW<[WriteSequence<[M1WriteNAL11], 4>], |
| 330 | + (instregex "^TB[LX]v8i8Four")>; |
| 331 | +def : InstRW<[M1WriteNAL12], (instregex "^TB[LX]v16i8One")>; |
| 332 | +def : InstRW<[WriteSequence<[M1WriteNAL12], 2>], |
| 333 | + (instregex "^TB[LX]v16i8Two")>; |
| 334 | +def : InstRW<[WriteSequence<[M1WriteNAL12], 3>], |
| 335 | + (instregex "^TB[LX]v16i8Three")>; |
| 336 | +def : InstRW<[WriteSequence<[M1WriteNAL12], 4>], |
| 337 | + (instregex "^TB[LX]v16i8Four")>; |
| 338 | +def : InstRW<[M1WriteNEOND], (instregex "^[SU]MOVv")>; |
| 339 | +def : InstRW<[M1WriteNALU1], (instregex "^INSv.+lane")>; |
| 340 | +def : InstRW<[M1WriteNALU1], (instregex "^(TRN|UZP)(1|2)(v8i8|v4i16|v2i32)")>; |
| 341 | +def : InstRW<[M1WriteNALU2], (instregex "^(TRN|UZP)(1|2)(v16i8|v8i16|v4i32|v2i64)")>; |
| 342 | +def : InstRW<[M1WriteNALU1], (instregex "^ZIP(1|2)v")>; |
| 343 | + |
| 344 | +// ASIMD load instructions. |
| 345 | + |
| 346 | +// ASIMD store instructions. |
| 347 | + |
| 348 | +// Cryptography instructions. |
| 349 | +def : InstRW<[M1WriteNCRYPT1], (instregex "^AES")>; |
| 350 | +def : InstRW<[M1WriteNCRYPT1], (instregex "^PMUL")>; |
| 351 | +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA1(H|SU)")>; |
| 352 | +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA1[CMP]")>; |
| 353 | +def : InstRW<[M1WriteNCRYPT1], (instregex "^SHA256SU0")>; |
| 354 | +def : InstRW<[M1WriteNCRYPT5], (instregex "^SHA256(H|SU1)")>; |
| 355 | + |
| 356 | +// CRC instructions. |
| 357 | +def : InstRW<[M1WriteC2], (instregex "^CRC32")>; |
| 358 | + |
| 359 | +} // SchedModel = ExynosM1Model |
0 commit comments