Skip to content

Commit b542fb3

Browse files
committedMay 28, 2017
[AArch64][Falkor] Fix some sched details.
- Remove all uses of base sched model entries and set them all to Unsupported so all the opcodes are described in AArch64SchedFalkorDetails.td. - Remove entries for unsupported half-float opcodes. - Remove entries for unsupported LSE extension opcodes. - Add entry for MOVbaseTLS (and set Sched in base td file entry to WriteSys) and a few other pseudo ops. - Fix a few FP load/store with reg offset entries to use the LSLfast predicates. - Add Q size BIF/BIT/BSL entries. - Fix swapped Q/D sized CLS/CLZ/CNT/RBIT entires. - Fix pre/post increment address register latency (this operand is always dest 0). - Fix swapped FCVTHD/FCVTHS/FCVTDH/FCVTDS entries. - Fix XYZ resource over usage on LD[1-4] opcodes. llvm-svn: 304108
1 parent e38cea0 commit b542fb3

File tree

4 files changed

+461
-294
lines changed

4 files changed

+461
-294
lines changed
 

‎llvm/lib/Target/AArch64/AArch64InstrInfo.td

+1-1
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def MSRpstateImm4 : MSRpstateImm0_15;
442442
// TPIDR_EL0. Add pseudo op so we can mark it as not having any side effects.
443443
let hasSideEffects = 0 in
444444
def MOVbaseTLS : Pseudo<(outs GPR64:$dst), (ins),
445-
[(set GPR64:$dst, AArch64threadpointer)]>, Sched<[]>;
445+
[(set GPR64:$dst, AArch64threadpointer)]>, Sched<[WriteSys]>;
446446

447447
// The cycle counter PMC register is PMCCNTR_EL0.
448448
let Predicates = [HasPerfMon] in

‎llvm/lib/Target/AArch64/AArch64SchedFalkor.td

+36-50
Original file line numberDiff line numberDiff line change
@@ -61,56 +61,42 @@ let SchedModel = FalkorModel in {
6161

6262
let SchedModel = FalkorModel in {
6363

64-
def : WriteRes<WriteImm, [FalkorUnitXYZ]> { let Latency = 1; }
65-
def : WriteRes<WriteI, [FalkorUnitXYZ]> { let Latency = 1; }
66-
def : WriteRes<WriteISReg, [FalkorUnitVXVY, FalkorUnitVXVY]>
67-
{ let Latency = 1; let NumMicroOps = 2; }
68-
def : WriteRes<WriteIEReg, [FalkorUnitXYZ, FalkorUnitXYZ]>
69-
{ let Latency = 2; let NumMicroOps = 2; }
70-
def : WriteRes<WriteExtr, [FalkorUnitXYZ, FalkorUnitXYZ]>
71-
{ let Latency = 2; let NumMicroOps = 2; }
72-
def : WriteRes<WriteIS, [FalkorUnitXYZ]> { let Latency = 1; }
73-
def : WriteRes<WriteID32, [FalkorUnitX, FalkorUnitZ]>
74-
{ let Latency = 8; let NumMicroOps = 2; }
75-
def : WriteRes<WriteID64, [FalkorUnitX, FalkorUnitZ]>
76-
{ let Latency = 16; let NumMicroOps = 2; }
77-
def : WriteRes<WriteIM32, [FalkorUnitX]> { let Latency = 4; }
78-
def : WriteRes<WriteIM64, [FalkorUnitX]> { let Latency = 5; }
79-
def : WriteRes<WriteBr, [FalkorUnitB]> { let Latency = 1; }
80-
def : WriteRes<WriteBrReg, [FalkorUnitB]> { let Latency = 1; }
81-
def : WriteRes<WriteLD, [FalkorUnitLD]> { let Latency = 3; }
82-
def : WriteRes<WriteST, [FalkorUnitST, FalkorUnitSD]>
83-
{ let Latency = 0; let NumMicroOps = 2; }
84-
def : WriteRes<WriteSTP, [FalkorUnitST, FalkorUnitSD]>
85-
{ let Latency = 0; let NumMicroOps = 2; }
86-
def : WriteRes<WriteAdr, [FalkorUnitXYZ]> { let Latency = 1; }
87-
def : WriteRes<WriteLDIdx, [FalkorUnitLD]> { let Latency = 5; }
88-
def : WriteRes<WriteSTIdx, [FalkorUnitST, FalkorUnitSD]>
89-
{ let Latency = 0; let NumMicroOps = 2; }
90-
def : WriteRes<WriteF, [FalkorUnitVXVY, FalkorUnitVXVY]>
91-
{ let Latency = 3; let NumMicroOps = 2; }
92-
def : WriteRes<WriteFCmp, [FalkorUnitVXVY]> { let Latency = 2; }
93-
def : WriteRes<WriteFCvt, [FalkorUnitVXVY]> { let Latency = 4; }
94-
def : WriteRes<WriteFCopy, [FalkorUnitVXVY]> { let Latency = 4; }
95-
def : WriteRes<WriteFImm, [FalkorUnitVXVY]> { let Latency = 4; }
96-
def : WriteRes<WriteFMul, [FalkorUnitVXVY, FalkorUnitVXVY]>
97-
{ let Latency = 6; let NumMicroOps = 2; }
98-
def : WriteRes<WriteFDiv, [FalkorUnitVXVY, FalkorUnitVXVY]>
99-
{ let Latency = 12; let NumMicroOps = 2; } // Fragent -1 / NoRSV +1
100-
def : WriteRes<WriteV, [FalkorUnitVXVY]> { let Latency = 6; }
101-
def : WriteRes<WriteVLD, [FalkorUnitLD]> { let Latency = 3; }
102-
def : WriteRes<WriteVST, [FalkorUnitST, FalkorUnitVSD]>
103-
{ let Latency = 0; let NumMicroOps = 2; }
104-
105-
def : WriteRes<WriteSys, []> { let Latency = 1; }
106-
def : WriteRes<WriteBarrier, []> { let Latency = 1; }
107-
def : WriteRes<WriteHint, []> { let Latency = 1; }
108-
109-
def : WriteRes<WriteLDHi, []> { let Latency = 3; }
110-
111-
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
112-
113-
// No forwarding logic is modelled yet.
64+
// These WriteRes entries are not used in the Falkor sched model.
65+
def : WriteRes<WriteImm, []> { let Unsupported = 1; }
66+
def : WriteRes<WriteI, []> { let Unsupported = 1; }
67+
def : WriteRes<WriteISReg, []> { let Unsupported = 1; }
68+
def : WriteRes<WriteIEReg, []> { let Unsupported = 1; }
69+
def : WriteRes<WriteExtr, []> { let Unsupported = 1; }
70+
def : WriteRes<WriteIS, []> { let Unsupported = 1; }
71+
def : WriteRes<WriteID32, []> { let Unsupported = 1; }
72+
def : WriteRes<WriteID64, []> { let Unsupported = 1; }
73+
def : WriteRes<WriteIM32, []> { let Unsupported = 1; }
74+
def : WriteRes<WriteIM64, []> { let Unsupported = 1; }
75+
def : WriteRes<WriteBr, []> { let Unsupported = 1; }
76+
def : WriteRes<WriteBrReg, []> { let Unsupported = 1; }
77+
def : WriteRes<WriteLD, []> { let Unsupported = 1; }
78+
def : WriteRes<WriteST, []> { let Unsupported = 1; }
79+
def : WriteRes<WriteSTP, []> { let Unsupported = 1; }
80+
def : WriteRes<WriteAdr, []> { let Unsupported = 1; }
81+
def : WriteRes<WriteLDIdx, []> { let Unsupported = 1; }
82+
def : WriteRes<WriteSTIdx, []> { let Unsupported = 1; }
83+
def : WriteRes<WriteF, []> { let Unsupported = 1; }
84+
def : WriteRes<WriteFCmp, []> { let Unsupported = 1; }
85+
def : WriteRes<WriteFCvt, []> { let Unsupported = 1; }
86+
def : WriteRes<WriteFCopy, []> { let Unsupported = 1; }
87+
def : WriteRes<WriteFImm, []> { let Unsupported = 1; }
88+
def : WriteRes<WriteFMul, []> { let Unsupported = 1; }
89+
def : WriteRes<WriteFDiv, []> { let Unsupported = 1; }
90+
def : WriteRes<WriteV, []> { let Unsupported = 1; }
91+
def : WriteRes<WriteVLD, []> { let Unsupported = 1; }
92+
def : WriteRes<WriteVST, []> { let Unsupported = 1; }
93+
def : WriteRes<WriteSys, []> { let Unsupported = 1; }
94+
def : WriteRes<WriteBarrier, []> { let Unsupported = 1; }
95+
def : WriteRes<WriteHint, []> { let Unsupported = 1; }
96+
def : WriteRes<WriteLDHi, []> { let Unsupported = 1; }
97+
def : WriteRes<WriteAtomic, []> { let Unsupported = 1; }
98+
99+
// These ReadAdvance entries are not used in the Falkor sched model.
114100
def : ReadAdvance<ReadI, 0>;
115101
def : ReadAdvance<ReadISReg, 0>;
116102
def : ReadAdvance<ReadIEReg, 0>;

‎llvm/lib/Target/AArch64/AArch64SchedFalkorDetails.td

+347-242
Large diffs are not rendered by default.

‎llvm/lib/Target/AArch64/AArch64SchedFalkorWriteRes.td

+77-1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,21 @@
2525
// and LSLFast.
2626
//===----------------------------------------------------------------------===//
2727

28+
//===----------------------------------------------------------------------===//
29+
// Define 0 micro-op types
30+
def FalkorWr_none_1cyc : SchedWriteRes<[]> {
31+
let Latency = 1;
32+
let NumMicroOps = 0;
33+
}
34+
def FalkorWr_none_3cyc : SchedWriteRes<[]> {
35+
let Latency = 3;
36+
let NumMicroOps = 0;
37+
}
38+
def FalkorWr_none_4cyc : SchedWriteRes<[]> {
39+
let Latency = 4;
40+
let NumMicroOps = 0;
41+
}
42+
2843
//===----------------------------------------------------------------------===//
2944
// Define 1 micro-op types
3045

@@ -49,6 +64,7 @@ def FalkorWr_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
4964
def FalkorWr_VMUL32_1VXVY_4cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 4; }
5065
def FalkorWr_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
5166
def FalkorWr_FMUL32_1VXVY_5cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 5; }
67+
def FalkorWr_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
5268
def FalkorWr_FMUL64_1VXVY_6cyc : SchedWriteRes<[FalkorUnitVXVY]>{ let Latency = 6; }
5369

5470
def FalkorWr_1LD_0cyc : SchedWriteRes<[FalkorUnitLD]> { let Latency = 0; }
@@ -163,11 +179,13 @@ def FalkorWr_1Z_1XY_0cyc : SchedWriteRes<[FalkorUnitZ, FalkorUnitXY]> {
163179

164180
def FalkorWr_1X_1Z_8cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
165181
let Latency = 8;
182+
let NumMicroOps = 2;
166183
let ResourceCycles = [2, 8];
167184
}
168185

169186
def FalkorWr_1X_1Z_16cyc : SchedWriteRes<[FalkorUnitX, FalkorUnitZ]> {
170187
let Latency = 16;
188+
let NumMicroOps = 2;
171189
let ResourceCycles = [2, 16];
172190
}
173191

@@ -309,6 +327,12 @@ def FalkorWr_2LD_1ST_1SD_3cyc: SchedWriteRes<[FalkorUnitLD, FalkorUnitST,
309327
let NumMicroOps = 4;
310328
}
311329

330+
def FalkorWr_2VSD_2ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
331+
FalkorUnitST, FalkorUnitVSD]> {
332+
let Latency = 0;
333+
let NumMicroOps = 4;
334+
}
335+
312336
//===----------------------------------------------------------------------===//
313337
// Define 5 micro-op types
314338

@@ -335,7 +359,12 @@ def FalkorWr_1XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
335359
let Latency = 0;
336360
let NumMicroOps = 5;
337361
}
338-
362+
def FalkorWr_1VXVY_2ST_2VSD_0cyc : SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
363+
FalkorUnitVSD, FalkorUnitST,
364+
FalkorUnitVSD]> {
365+
let Latency = 0;
366+
let NumMicroOps = 5;
367+
}
339368
//===----------------------------------------------------------------------===//
340369
// Define 6 micro-op types
341370

@@ -352,6 +381,20 @@ def FalkorWr_2XYZ_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitXYZ, FalkorUnitST,
352381
let NumMicroOps = 6;
353382
}
354383

384+
def FalkorWr_2VXVY_2ST_2VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
385+
FalkorUnitVSD, FalkorUnitVXVY,
386+
FalkorUnitST, FalkorUnitVSD]> {
387+
let Latency = 0;
388+
let NumMicroOps = 6;
389+
}
390+
391+
def FalkorWr_3VSD_3ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
392+
FalkorUnitST, FalkorUnitVSD,
393+
FalkorUnitST, FalkorUnitVSD]> {
394+
let Latency = 0;
395+
let NumMicroOps = 6;
396+
}
397+
355398
//===----------------------------------------------------------------------===//
356399
// Define 8 micro-op types
357400

@@ -363,6 +406,14 @@ def FalkorWr_2LD_2VXVY_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD, FalkorUnitLD,
363406
let NumMicroOps = 8;
364407
}
365408

409+
def FalkorWr_4VSD_4ST_0cyc: SchedWriteRes<[FalkorUnitST, FalkorUnitVSD,
410+
FalkorUnitST, FalkorUnitVSD,
411+
FalkorUnitST, FalkorUnitVSD,
412+
FalkorUnitST, FalkorUnitVSD]> {
413+
let Latency = 0;
414+
let NumMicroOps = 8;
415+
}
416+
366417
//===----------------------------------------------------------------------===//
367418
// Define 9 micro-op types
368419

@@ -384,6 +435,31 @@ def FalkorWr_2LD_2VXVY_1XYZ_2LD_2VXVY_4cyc:SchedWriteRes<[FalkorUnitLD,
384435
let NumMicroOps = 9;
385436
}
386437

438+
//===----------------------------------------------------------------------===//
439+
// Define 10 micro-op types
440+
441+
def FalkorWr_2VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
442+
FalkorUnitVSD, FalkorUnitVXVY,
443+
FalkorUnitST, FalkorUnitVSD,
444+
FalkorUnitST, FalkorUnitVSD,
445+
FalkorUnitST, FalkorUnitVSD]> {
446+
let Latency = 0;
447+
let NumMicroOps = 10;
448+
}
449+
450+
//===----------------------------------------------------------------------===//
451+
// Define 12 micro-op types
452+
453+
def FalkorWr_4VXVY_4ST_4VSD_0cyc: SchedWriteRes<[FalkorUnitVXVY, FalkorUnitST,
454+
FalkorUnitVSD, FalkorUnitVXVY,
455+
FalkorUnitST, FalkorUnitVSD,
456+
FalkorUnitVXVY, FalkorUnitST,
457+
FalkorUnitVSD, FalkorUnitVXVY,
458+
FalkorUnitST, FalkorUnitVSD]> {
459+
let Latency = 0;
460+
let NumMicroOps = 12;
461+
}
462+
387463
// Forwarding logic is modeled for multiply add/accumulate.
388464
// -----------------------------------------------------------------------------
389465
def FalkorReadIMA32 : SchedReadAdvance<3, [FalkorWr_IMUL32_1X_2cyc]>;

0 commit comments

Comments
 (0)
Please sign in to comment.