Skip to content

Commit c396f09

Browse files
committedMay 10, 2019
Adjust MachineScheduler to use ProcResource counts
This fix allows the scheduler to take into account the number of instances of each ProcResource specified. Previously a declaration in a scheduler of ProcResource<1> would be treated identically to a declaration of ProcResource<2>. Now the hazard recognizer would report a hazard only after all of the resource instances are busy. Patch by Jackson Woodruff and Momchil Velikov. Differential Revision: https://reviews.llvm.org/D51160 llvm-svn: 360441
1 parent 6150407 commit c396f09

File tree

4 files changed

+88
-19
lines changed

4 files changed

+88
-19
lines changed
 

‎llvm/include/llvm/CodeGen/MachineScheduler.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -666,6 +666,10 @@ class SchedBoundary {
666666
// scheduled instruction.
667667
SmallVector<unsigned, 16> ReservedCycles;
668668

669+
// For each PIdx, stores first index into ReservedCycles that corresponds to
670+
// it.
671+
SmallVector<unsigned, 16> ReservedCyclesIndex;
672+
669673
#ifndef NDEBUG
670674
// Remember the greatest possible stall as an upper bound on the number of
671675
// times we should retry the pending queue because of a hazard.
@@ -740,7 +744,11 @@ class SchedBoundary {
740744
/// cycle.
741745
unsigned getLatencyStallCycles(SUnit *SU);
742746

743-
unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles);
747+
unsigned getNextResourceCycleByInstance(unsigned InstanceIndex,
748+
unsigned Cycles);
749+
750+
std::pair<unsigned, unsigned> getNextResourceCycle(unsigned PIdx,
751+
unsigned Cycles);
744752

745753
bool checkHazard(SUnit *SU);
746754

‎llvm/lib/CodeGen/MachineScheduler.cpp

Lines changed: 55 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1863,6 +1863,7 @@ void SchedBoundary::reset() {
18631863
ZoneCritResIdx = 0;
18641864
IsResourceLimited = false;
18651865
ReservedCycles.clear();
1866+
ReservedCyclesIndex.clear();
18661867
#ifndef NDEBUG
18671868
// Track the maximum number of stall cycles that could arise either from the
18681869
// latency of a DAG edge or the number of cycles that a processor resource is
@@ -1901,8 +1902,17 @@ init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) {
19011902
SchedModel = smodel;
19021903
Rem = rem;
19031904
if (SchedModel->hasInstrSchedModel()) {
1904-
ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds());
1905-
ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle);
1905+
unsigned ResourceCount = SchedModel->getNumProcResourceKinds();
1906+
ReservedCyclesIndex.resize(ResourceCount);
1907+
ExecutedResCounts.resize(ResourceCount);
1908+
unsigned NumUnits = 0;
1909+
1910+
for (unsigned i = 0; i < ResourceCount; ++i) {
1911+
ReservedCyclesIndex[i] = NumUnits;
1912+
NumUnits += SchedModel->getProcResource(i)->NumUnits;
1913+
}
1914+
1915+
ReservedCycles.resize(NumUnits, InvalidCycle);
19061916
}
19071917
}
19081918

@@ -1923,11 +1933,11 @@ unsigned SchedBoundary::getLatencyStallCycles(SUnit *SU) {
19231933
return 0;
19241934
}
19251935

1926-
/// Compute the next cycle at which the given processor resource can be
1927-
/// scheduled.
1928-
unsigned SchedBoundary::
1929-
getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1930-
unsigned NextUnreserved = ReservedCycles[PIdx];
1936+
/// Compute the next cycle at which the given processor resource unit
1937+
/// can be scheduled.
1938+
unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx,
1939+
unsigned Cycles) {
1940+
unsigned NextUnreserved = ReservedCycles[InstanceIdx];
19311941
// If this resource has never been used, always return cycle zero.
19321942
if (NextUnreserved == InvalidCycle)
19331943
return 0;
@@ -1937,6 +1947,29 @@ getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
19371947
return NextUnreserved;
19381948
}
19391949

1950+
/// Compute the next cycle at which the given processor resource can be
1951+
/// scheduled. Returns the next cycle and the index of the processor resource
1952+
/// instance in the reserved cycles vector.
1953+
std::pair<unsigned, unsigned>
1954+
SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) {
1955+
unsigned MinNextUnreserved = InvalidCycle;
1956+
unsigned InstanceIdx = 0;
1957+
unsigned StartIndex = ReservedCyclesIndex[PIdx];
1958+
unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits;
1959+
assert(NumberOfInstances > 0 &&
1960+
"Cannot have zero instances of a ProcResource");
1961+
1962+
for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End;
1963+
++I) {
1964+
unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles);
1965+
if (MinNextUnreserved > NextUnreserved) {
1966+
InstanceIdx = I;
1967+
MinNextUnreserved = NextUnreserved;
1968+
}
1969+
}
1970+
return std::make_pair(MinNextUnreserved, InstanceIdx);
1971+
}
1972+
19401973
/// Does this SU have a hazard within the current instruction group.
19411974
///
19421975
/// The scheduler supports two modes of hazard recognition. The first is the
@@ -1978,14 +2011,16 @@ bool SchedBoundary::checkHazard(SUnit *SU) {
19782011
SchedModel->getWriteProcResEnd(SC))) {
19792012
unsigned ResIdx = PE.ProcResourceIdx;
19802013
unsigned Cycles = PE.Cycles;
1981-
unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles);
2014+
unsigned NRCycle, InstanceIdx;
2015+
std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles);
19822016
if (NRCycle > CurrCycle) {
19832017
#ifndef NDEBUG
19842018
MaxObservedStall = std::max(Cycles, MaxObservedStall);
19852019
#endif
19862020
LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") "
1987-
<< SchedModel->getResourceName(ResIdx) << "="
1988-
<< NRCycle << "c\n");
2021+
<< SchedModel->getResourceName(ResIdx)
2022+
<< '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']'
2023+
<< "=" << NRCycle << "c\n");
19892024
return true;
19902025
}
19912026
}
@@ -2140,10 +2175,12 @@ countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) {
21402175
<< "c\n");
21412176
}
21422177
// For reserved resources, record the highest cycle using the resource.
2143-
unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles);
2178+
unsigned NextAvailable, InstanceIdx;
2179+
std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles);
21442180
if (NextAvailable > CurrCycle) {
21452181
LLVM_DEBUG(dbgs() << " Resource conflict: "
2146-
<< SchedModel->getProcResource(PIdx)->Name
2182+
<< SchedModel->getResourceName(PIdx)
2183+
<< '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']'
21472184
<< " reserved until @" << NextAvailable << "\n");
21482185
}
21492186
return NextAvailable;
@@ -2233,12 +2270,13 @@ void SchedBoundary::bumpNode(SUnit *SU) {
22332270
PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) {
22342271
unsigned PIdx = PI->ProcResourceIdx;
22352272
if (SchedModel->getProcResource(PIdx)->BufferSize == 0) {
2273+
unsigned ReservedUntil, InstanceIdx;
2274+
std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0);
22362275
if (isTop()) {
2237-
ReservedCycles[PIdx] =
2238-
std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles);
2239-
}
2240-
else
2241-
ReservedCycles[PIdx] = NextCycle;
2276+
ReservedCycles[InstanceIdx] =
2277+
std::max(ReservedUntil, NextCycle + PI->Cycles);
2278+
} else
2279+
ReservedCycles[InstanceIdx] = NextCycle;
22422280
}
22432281
}
22442282
}

‎llvm/test/CodeGen/AArch64/misched-fusion-aes.ll

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -205,7 +205,9 @@ entry:
205205

206206
; CHECK-LABEL: aes_load_store:
207207
; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}}
208-
; CHECK-NEXT: aesmc [[VA]], [[VA]]
208+
; aese and aesmc are described to share a unit, hence won't be scheduled on the
209+
; same cycle and the scheduler can find another instruction to place inbetween
210+
; CHECK: aesmc [[VA]], [[VA]]
209211
; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}}
210212
; CHECK-NEXT: aesmc [[VB]], [[VB]]
211213
; CHECK-NOT: aesmc
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; RUN: llc -mtriple=arm-eabi -mcpu=cortex-r52 -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-R52
2+
; REQUIRES: asserts
3+
4+
; source_filename = "sched-2.c"
5+
target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
6+
7+
define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr {
8+
entry:
9+
%add = add nsw i32 %b, %a
10+
%add1 = add nsw i32 %d, %c
11+
%div = sdiv i32 %add, %add1
12+
ret i32 %div
13+
}
14+
15+
; Cortex-R52 model describes it as dual-issue with two integer ALUs
16+
; It should be able to issue the two additions in the same cycle.
17+
; CHECK-R52: MI Scheduling
18+
; CHECK-R52: Cycle: 14
19+
; CHECK-R52: Scheduling SU(5) %5:gpr = nsw ADDrr %3:gpr, %2:gpr, 14, $noreg, $noreg
20+
; CHECK-R52: Scheduling SU(4) %4:gpr = nsw ADDrr %1:gpr, %0:gpr, 14, $noreg, $noreg
21+
; CHECK-R52: Cycle: 15

0 commit comments

Comments
 (0)
Please sign in to comment.