@@ -168,20 +168,15 @@ Analysis::makePointsPerSchedClass() const {
168
168
return PointsPerSchedClass;
169
169
}
170
170
171
- void Analysis::printSchedClassClustersHtml (std::vector<size_t > PointIds,
172
- llvm::raw_ostream &OS) const {
173
- assert (!PointIds.empty ());
174
- // Sort the points by cluster id so that we can display them grouped by
175
- // cluster.
176
- llvm::sort (PointIds.begin (), PointIds.end (),
177
- [this ](const size_t A, const size_t B) {
178
- return Clustering_.getClusterIdForPoint (A) <
179
- Clustering_.getClusterIdForPoint (B);
180
- });
171
+ void Analysis::printSchedClassClustersHtml (
172
+ const std::vector<SchedClassCluster> &Clusters, const SchedClass &SC,
173
+ llvm::raw_ostream &OS) const {
181
174
const auto &Points = Clustering_.getPoints ();
182
175
OS << " <table class=\" sched-class-clusters\" >" ;
183
176
OS << " <tr><th>ClusterId</th><th>Opcode/Config</th>" ;
184
- for (const auto &Measurement : Points[PointIds[0 ]].Measurements ) {
177
+ assert (!Clusters.empty ());
178
+ for (const auto &Measurement :
179
+ Points[Clusters[0 ].getPointIds ()[0 ]].Measurements ) {
185
180
OS << " <th>" ;
186
181
if (Measurement.DebugString .empty ())
187
182
writeEscaped<kEscapeHtml >(OS, Measurement.Key );
@@ -190,29 +185,24 @@ void Analysis::printSchedClassClustersHtml(std::vector<size_t> PointIds,
190
185
OS << " </th>" ;
191
186
}
192
187
OS << " </tr>" ;
193
- for (size_t I = 0 , E = PointIds.size (); I < E;) {
194
- const auto &CurrentClusterId =
195
- Clustering_.getClusterIdForPoint (PointIds[I]);
196
- OS << " <tr><td>" ;
197
- writeClusterId<kEscapeHtml >(OS, CurrentClusterId);
188
+ for (const SchedClassCluster &Cluster : Clusters) {
189
+ OS << " <tr class=\" "
190
+ << (Cluster.measurementsMatch (*SubtargetInfo_, SC, Clustering_)
191
+ ? " good-cluster"
192
+ : " bad-cluster" )
193
+ << " \" ><td>" ;
194
+ writeClusterId<kEscapeHtml >(OS, Cluster.id ());
198
195
OS << " </td><td><ul>" ;
199
- std::vector<BenchmarkMeasureStats> MeasurementStats (
200
- Points[PointIds[I]].Measurements .size ());
201
- for (; I < E &&
202
- Clustering_.getClusterIdForPoint (PointIds[I]) == CurrentClusterId;
203
- ++I) {
204
- const auto &Point = Points[PointIds[I]];
196
+ for (const size_t PointId : Cluster.getPointIds ()) {
197
+ const auto &Point = Points[PointId];
205
198
OS << " <li><span class=\" mono\" >" ;
206
199
writeEscaped<kEscapeHtml >(OS, Point .Key .OpcodeName );
207
200
OS << " </span> <span class=\" mono\" >" ;
208
201
writeEscaped<kEscapeHtml >(OS, Point .Key .Config );
209
202
OS << " </span></li>" ;
210
- for (size_t J = 0 , F = Point .Measurements .size (); J < F; ++J) {
211
- MeasurementStats[J].push (Point .Measurements [J]);
212
- }
213
203
}
214
204
OS << " </ul></td>" ;
215
- for (const auto &Stats : MeasurementStats ) {
205
+ for (const auto &Stats : Cluster. getRepresentative () ) {
216
206
OS << " <td class=\" measurement\" >" ;
217
207
writeMeasurementValue<kEscapeHtml >(OS, Stats.avg ());
218
208
OS << " <br><span class=\" minmax\" >[" ;
@@ -300,25 +290,101 @@ getNonRedundantWriteProcRes(const llvm::MCSchedClassDesc &SCDesc,
300
290
return Result;
301
291
}
302
292
303
- void Analysis::printSchedClassDescHtml (const llvm::MCSchedClassDesc &SCDesc,
293
+ Analysis::SchedClass::SchedClass (const llvm::MCSchedClassDesc &SD,
294
+ const llvm::MCSubtargetInfo &STI)
295
+ : SCDesc(SD),
296
+ NonRedundantWriteProcRes (getNonRedundantWriteProcRes(SD, STI)),
297
+ IdealizedProcResPressure(computeIdealizedProcResPressure(
298
+ STI.getSchedModel(), NonRedundantWriteProcRes)) {}
299
+
300
+ void Analysis::SchedClassCluster::addPoint (
301
+ size_t PointId, const InstructionBenchmarkClustering &Clustering) {
302
+ PointIds.push_back (PointId);
303
+ const auto &Point = Clustering.getPoints ()[PointId];
304
+ if (ClusterId.isUndef ()) {
305
+ ClusterId = Clustering.getClusterIdForPoint (PointId);
306
+ Representative.resize (Point .Measurements .size ());
307
+ }
308
+ for (size_t I = 0 , E = Point .Measurements .size (); I < E; ++I) {
309
+ Representative[I].push (Point .Measurements [I]);
310
+ }
311
+ assert (ClusterId == Clustering.getClusterIdForPoint (PointId));
312
+ }
313
+
314
+ bool Analysis::SchedClassCluster::measurementsMatch (
315
+ const llvm::MCSubtargetInfo &STI, const SchedClass &SC,
316
+ const InstructionBenchmarkClustering &Clustering) const {
317
+ const size_t NumMeasurements = Representative.size ();
318
+ std::vector<BenchmarkMeasure> ClusterCenterPoint (NumMeasurements);
319
+ std::vector<BenchmarkMeasure> SchedClassPoint (NumMeasurements);
320
+ // Latency case.
321
+ assert (!Clustering.getPoints ().empty ());
322
+ const std::string &Mode = Clustering.getPoints ()[0 ].Key .Mode ;
323
+ if (Mode == " latency" ) { // FIXME: use an enum.
324
+ if (NumMeasurements != 1 ) {
325
+ llvm::errs ()
326
+ << " invalid number of measurements in latency mode: expected 1, got "
327
+ << NumMeasurements << " \n " ;
328
+ return false ;
329
+ }
330
+ // Find the latency.
331
+ SchedClassPoint[0 ].Value = 0.0 ;
332
+ for (unsigned I = 0 ; I < SC.SCDesc .NumWriteLatencyEntries ; ++I) {
333
+ const llvm::MCWriteLatencyEntry *const WLE =
334
+ STI.getWriteLatencyEntry (&SC.SCDesc , I);
335
+ SchedClassPoint[0 ].Value =
336
+ std::max<double >(SchedClassPoint[0 ].Value , WLE->Cycles );
337
+ }
338
+ ClusterCenterPoint[0 ].Value = Representative[0 ].avg ();
339
+ } else if (Mode == " uops" ) {
340
+ for (int I = 0 , E = Representative.size (); I < E; ++I) {
341
+ // Find the pressure on ProcResIdx `Key`.
342
+ uint16_t ProcResIdx = 0 ;
343
+ if (!llvm::to_integer (Representative[I].key (), ProcResIdx, 10 )) {
344
+ llvm::errs () << " expected ProcResIdx key, got "
345
+ << Representative[I].key () << " \n " ;
346
+ return false ;
347
+ }
348
+ const auto ProcResPressureIt =
349
+ std::find_if (SC.IdealizedProcResPressure .begin (),
350
+ SC.IdealizedProcResPressure .end (),
351
+ [ProcResIdx](const std::pair<uint16_t , float > &WPR) {
352
+ return WPR.first == ProcResIdx;
353
+ });
354
+ SchedClassPoint[I].Value =
355
+ ProcResPressureIt == SC.IdealizedProcResPressure .end ()
356
+ ? 0.0
357
+ : ProcResPressureIt->second ;
358
+ ClusterCenterPoint[I].Value = Representative[I].avg ();
359
+ }
360
+ } else {
361
+ llvm::errs () << " unimplemented measurement matching for mode ''" << Mode
362
+ << " ''\n " ;
363
+ return false ;
364
+ }
365
+ return Clustering.isNeighbour (ClusterCenterPoint, SchedClassPoint);
366
+ }
367
+
368
+ void Analysis::printSchedClassDescHtml (const SchedClass &SC,
304
369
llvm::raw_ostream &OS) const {
305
370
OS << " <table class=\" sched-class-desc\" >" ;
306
371
OS << " <tr><th>Valid</th><th>Variant</th><th>uOps</th><th>Latency</"
307
372
" th><th>WriteProcRes</th><th title=\" This is the idealized unit "
308
373
" resource (port) pressure assuming ideal distribution\" >Idealized "
309
374
" Resource Pressure</th></tr>" ;
310
- if (SCDesc.isValid ()) {
375
+ if (SC. SCDesc .isValid ()) {
311
376
const auto &SM = SubtargetInfo_->getSchedModel ();
312
377
OS << " <tr><td>✔</td>" ;
313
- OS << " <td>" << (SCDesc.isVariant () ? " ✔" : " ✕" ) << " </td>" ;
314
- OS << " <td>" << SCDesc.NumMicroOps << " </td>" ;
378
+ OS << " <td>" << (SC.SCDesc .isVariant () ? " ✔" : " ✕" )
379
+ << " </td>" ;
380
+ OS << " <td>" << SC.SCDesc .NumMicroOps << " </td>" ;
315
381
// Latencies.
316
382
OS << " <td><ul>" ;
317
- for (int I = 0 , E = SCDesc.NumWriteLatencyEntries ; I < E; ++I) {
383
+ for (int I = 0 , E = SC. SCDesc .NumWriteLatencyEntries ; I < E; ++I) {
318
384
const auto *const Entry =
319
- SubtargetInfo_->getWriteLatencyEntry (&SCDesc, I);
385
+ SubtargetInfo_->getWriteLatencyEntry (&SC. SCDesc , I);
320
386
OS << " <li>" << Entry->Cycles ;
321
- if (SCDesc.NumWriteLatencyEntries > 1 ) {
387
+ if (SC. SCDesc .NumWriteLatencyEntries > 1 ) {
322
388
// Dismabiguate if more than 1 latency.
323
389
OS << " (WriteResourceID " << Entry->WriteResourceID << " )" ;
324
390
}
@@ -327,8 +393,7 @@ void Analysis::printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc,
327
393
OS << " </ul></td>" ;
328
394
// WriteProcRes.
329
395
OS << " <td><ul>" ;
330
- const auto ProcRes = getNonRedundantWriteProcRes (SCDesc, *SubtargetInfo_);
331
- for (const auto &WPR : ProcRes) {
396
+ for (const auto &WPR : SC.NonRedundantWriteProcRes ) {
332
397
OS << " <li><span class=\" mono\" >" ;
333
398
writeEscaped<kEscapeHtml >(OS,
334
399
SM.getProcResource (WPR.ProcResourceIdx )->Name );
@@ -337,7 +402,7 @@ void Analysis::printSchedClassDescHtml(const llvm::MCSchedClassDesc &SCDesc,
337
402
OS << " </ul></td>" ;
338
403
// Idealized port pressure.
339
404
OS << " <td><ul>" ;
340
- for (const auto &Pressure : computeIdealizedProcResPressure (SM, ProcRes) ) {
405
+ for (const auto &Pressure : SC. IdealizedProcResPressure ) {
341
406
OS << " <li><span class=\" mono\" >" ;
342
407
writeEscaped<kEscapeHtml >(OS, SubtargetInfo_->getSchedModel ()
343
408
.getProcResource (Pressure.first )
@@ -401,59 +466,87 @@ table.sched-class-desc td {
401
466
span.mono {
402
467
font-family: monospace;
403
468
}
404
- span.minmax {
405
- color: #888;
406
- }
407
469
td.measurement {
408
470
text-align: center;
409
471
}
472
+ tr.good-cluster td.measurement {
473
+ color: #292
474
+ }
475
+ tr.bad-cluster td.measurement {
476
+ color: #922
477
+ }
478
+ tr.good-cluster td.measurement span.minmax {
479
+ color: #888;
480
+ }
481
+ tr.bad-cluster td.measurement span.minmax {
482
+ color: #888;
483
+ }
410
484
</style>
411
485
</head>
412
486
)" ;
413
487
414
488
template <>
415
489
llvm::Error Analysis::run<Analysis::PrintSchedClassInconsistencies>(
416
490
llvm::raw_ostream &OS) const {
491
+ const auto &FirstPoint = Clustering_.getPoints ()[0 ];
417
492
// Print the header.
418
493
OS << " <!DOCTYPE html><html>" << kHtmlHead << " <body>" ;
419
494
OS << " <h1><span class=\" mono\" >llvm-exegesis</span> Analysis Results</h1>" ;
420
495
OS << " <h3>Triple: <span class=\" mono\" >" ;
421
- writeEscaped<kEscapeHtml >(OS, Clustering_. getPoints ()[ 0 ] .LLVMTriple );
496
+ writeEscaped<kEscapeHtml >(OS, FirstPoint .LLVMTriple );
422
497
OS << " </span></h3><h3>Cpu: <span class=\" mono\" >" ;
423
- writeEscaped<kEscapeHtml >(OS, Clustering_. getPoints ()[ 0 ] .CpuName );
498
+ writeEscaped<kEscapeHtml >(OS, FirstPoint .CpuName );
424
499
OS << " </span></h3>" ;
425
500
426
- // All the points in a scheduling class should be in the same cluster.
427
- // Print any scheduling class for which this is not the case.
428
501
for (const auto &SchedClassAndPoints : makePointsPerSchedClass ()) {
429
- std::unordered_set<size_t > ClustersForSchedClass;
430
- for (const size_t PointId : SchedClassAndPoints.second ) {
502
+ const auto SchedClassId = SchedClassAndPoints.first ;
503
+ const std::vector<size_t > &SchedClassPoints = SchedClassAndPoints.second ;
504
+ const auto &SchedModel = SubtargetInfo_->getSchedModel ();
505
+ const llvm::MCSchedClassDesc *const SCDesc =
506
+ SchedModel.getSchedClassDesc (SchedClassId);
507
+ if (!SCDesc)
508
+ continue ;
509
+ const SchedClass SC (*SCDesc, *SubtargetInfo_);
510
+
511
+ // Bucket sched class points into sched class clusters.
512
+ std::vector<SchedClassCluster> SchedClassClusters;
513
+ for (const size_t PointId : SchedClassPoints) {
431
514
const auto &ClusterId = Clustering_.getClusterIdForPoint (PointId);
432
515
if (!ClusterId.isValid ())
433
- continue ; // Ignore noise and errors.
434
- ClustersForSchedClass.insert (ClusterId.getId ());
516
+ continue ; // Ignore noise and errors. FIXME: take noise into account ?
517
+ auto SchedClassClusterIt =
518
+ std::find_if (SchedClassClusters.begin (), SchedClassClusters.end (),
519
+ [ClusterId](const SchedClassCluster &C) {
520
+ return C.id () == ClusterId;
521
+ });
522
+ if (SchedClassClusterIt == SchedClassClusters.end ()) {
523
+ SchedClassClusters.emplace_back ();
524
+ SchedClassClusterIt = std::prev (SchedClassClusters.end ());
525
+ }
526
+ SchedClassClusterIt->addPoint (PointId, Clustering_);
435
527
}
436
- if (ClustersForSchedClass.size () <= 1 )
528
+
529
+ // Print any scheduling class that has at least one cluster that does not
530
+ // match the checked-in data.
531
+ if (std::all_of (SchedClassClusters.begin (), SchedClassClusters.end (),
532
+ [this , &SC](const SchedClassCluster &C) {
533
+ return C.measurementsMatch (*SubtargetInfo_, SC,
534
+ Clustering_);
535
+ }))
437
536
continue ; // Nothing weird.
438
537
439
- const auto &SchedModel = SubtargetInfo_->getSchedModel ();
440
- const llvm::MCSchedClassDesc *const SCDesc =
441
- SchedModel.getSchedClassDesc (SchedClassAndPoints.first );
442
- if (!SCDesc)
443
- continue ;
444
538
OS << " <div class=\" inconsistency\" ><p>Sched Class <span "
445
539
" class=\" sched-class-name\" >" ;
446
540
#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
447
541
writeEscaped<kEscapeHtml >(OS, SCDesc->Name );
448
542
#else
449
- OS << SchedClassAndPoints. first ;
543
+ OS << SchedClassId ;
450
544
#endif
451
- OS << " </span> contains instructions with distinct performance "
452
- " characteristics, falling into "
453
- << ClustersForSchedClass.size () << " clusters:</p>" ;
454
- printSchedClassClustersHtml (SchedClassAndPoints.second , OS);
455
- OS << " <p>llvm data:</p>" ;
456
- printSchedClassDescHtml (*SCDesc, OS);
545
+ OS << " </span> contains instructions whose performance characteristics do"
546
+ " not match that of LLVM:</p>" ;
547
+ printSchedClassClustersHtml (SchedClassClusters, SC, OS);
548
+ OS << " <p>llvm SchedModel data:</p>" ;
549
+ printSchedClassDescHtml (SC, OS);
457
550
OS << " </div>" ;
458
551
}
459
552
0 commit comments