diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -74,6 +74,7 @@ #ifndef LLVM_CODEGEN_MACHINESCHEDULER_H #define LLVM_CODEGEN_MACHINESCHEDULER_H +#include "llvm/ADT/APInt.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/STLExtras.h" @@ -674,6 +675,9 @@ // it. SmallVector ReservedCyclesIndex; + // For each PIdx, stores the resource group IDs of its subunits + SmallVector ResourceGroupSubUnitMasks; + #ifndef NDEBUG // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -751,9 +755,15 @@ unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles); - std::pair getNextResourceCycle(unsigned PIdx, + std::pair getNextResourceCycle(const MCSchedClassDesc *SC, + unsigned PIdx, unsigned Cycles); + bool isUnbufferedGroup(unsigned PIdx) const { + return SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && + !SchedModel->getProcResource(PIdx)->BufferSize; + } + bool checkHazard(SUnit *SU); unsigned findMaxLatency(ArrayRef ReadySUs); @@ -775,7 +785,8 @@ void incExecutedResources(unsigned PIdx, unsigned Count); - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); + unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles, unsigned ReadyCycle); void bumpNode(SUnit *SU); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2004,6 +2004,7 @@ IsResourceLimited = false; ReservedCycles.clear(); ReservedCyclesIndex.clear(); + ResourceGroupSubUnitMasks.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -2045,11 +2046,18 @@ unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); ReservedCyclesIndex.resize(ResourceCount); ExecutedResCounts.resize(ResourceCount); + ResourceGroupSubUnitMasks.resize(ResourceCount, APInt(ResourceCount, 0)); unsigned NumUnits = 0; for (unsigned i = 0; i < ResourceCount; ++i) { ReservedCyclesIndex[i] = NumUnits; NumUnits += SchedModel->getProcResource(i)->NumUnits; + if (isUnbufferedGroup(i)) { + auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin; + for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits; + U != UE; ++U) + ResourceGroupSubUnitMasks[i].setBit(SubUnits[U]); + } } ReservedCycles.resize(NumUnits, InvalidCycle); @@ -2091,7 +2099,9 @@ /// scheduled. Returns the next cycle and the index of the processor resource /// instance in the reserved cycles vector. std::pair -SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { +SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; unsigned InstanceIdx = 0; unsigned StartIndex = ReservedCyclesIndex[PIdx]; @@ -2099,6 +2109,35 @@ assert(NumberOfInstances > 0 && "Cannot have zero instances of a ProcResource"); + if (isUnbufferedGroup(PIdx)) { + // If any subunits are used by the instruction, report that the resource + // group is available at 0, effectively removing the group record from + // hazarding and basing the hazarding decisions on the subunit records. + // Otherwise, choose the first available instance from among the subunits. + // Specifications which assign cycles to both the subunits and the group or + // which use an unbuffered group with buffered subunits will appear to + // schedule strangely. In the first case, the additional cycles for the + // group will be ignored. In the second, the group will be ignored + // entirely. + for (const MCWriteProcResEntry &PE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) + if (ResourceGroupSubUnitMasks[PIdx][PE.ProcResourceIdx]) + return std::make_pair(0u, StartIndex); + + auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; + for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) { + unsigned NextUnreserved, NextInstanceIdx; + std::tie(NextUnreserved, NextInstanceIdx) = + getNextResourceCycle(SC, SubUnits[I], Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = NextInstanceIdx; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); + } + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; ++I) { unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); @@ -2152,7 +2191,7 @@ unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; unsigned NRCycle, InstanceIdx; - std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); + std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); @@ -2302,8 +2341,8 @@ /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { +unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" @@ -2325,7 +2364,7 @@ } // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable, InstanceIdx; - std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); + std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getResourceName(PIdx) @@ -2405,7 +2444,7 @@ PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); + countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } @@ -2420,7 +2459,8 @@ unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); + std::tie(ReservedUntil, InstanceIdx) = + getNextResourceCycle(SC, PIdx, 0); if (isTop()) { ReservedCycles[InstanceIdx] = std::max(ReservedUntil, NextCycle + PI->Cycles); diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir --- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir +++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir @@ -22,9 +22,9 @@ ; CHECK-LABEL: name: test_groups ; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4 ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0