diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -674,6 +674,9 @@ // it. SmallVector ReservedCyclesIndex; + // For each PIdx, stores the resource group IDs of its subunits + SmallVector ResourceGroupSubUnitMasks; + #ifndef NDEBUG // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -751,7 +754,8 @@ unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles); - std::pair getNextResourceCycle(unsigned PIdx, + std::pair getNextResourceCycle(const MCSchedClassDesc *SC, + unsigned PIdx, unsigned Cycles); bool checkHazard(SUnit *SU); @@ -775,7 +779,8 @@ void incExecutedResources(unsigned PIdx, unsigned Count); - unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); + unsigned countResource(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles, unsigned ReadyCycle); void bumpNode(SUnit *SU); diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -2004,6 +2004,7 @@ IsResourceLimited = false; ReservedCycles.clear(); ReservedCyclesIndex.clear(); + ResourceGroupSubUnitMasks.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -2045,11 +2046,19 @@ unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); ReservedCyclesIndex.resize(ResourceCount); ExecutedResCounts.resize(ResourceCount); + ResourceGroupSubUnitMasks.resize(ResourceCount, 0); unsigned NumUnits = 0; for (unsigned i = 0; i < ResourceCount; ++i) { ReservedCyclesIndex[i] = NumUnits; NumUnits += SchedModel->getProcResource(i)->NumUnits; + if (SchedModel->getProcResource(i)->SubUnitsIdxBegin && + !SchedModel->getProcResource(i)->BufferSize) { + auto SubUnits = SchedModel->getProcResource(i)->SubUnitsIdxBegin; + for (unsigned U = 0, UE = SchedModel->getProcResource(i)->NumUnits; + U != UE; ++U) + ResourceGroupSubUnitMasks[i] |= 1ULL << SubUnits[U]; + } } ReservedCycles.resize(NumUnits, InvalidCycle); @@ -2091,7 +2100,9 @@ /// scheduled. Returns the next cycle and the index of the processor resource /// instance in the reserved cycles vector. std::pair -SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { +SchedBoundary::getNextResourceCycle(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; unsigned InstanceIdx = 0; unsigned StartIndex = ReservedCyclesIndex[PIdx]; @@ -2099,6 +2110,36 @@ assert(NumberOfInstances > 0 && "Cannot have zero instances of a ProcResource"); + bool IsUnbufferedGroup = + SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin && + !SchedModel->getProcResource(PIdx)->BufferSize; + if (IsUnbufferedGroup) { + // If any subunits are used by the instruction, do nothing, otherwise, + // choose the first available instance from among the subunits. + // Specifications which assign cycles to both the subunits and the group or + // which use an unbuffered group with buffered subunits will appear to + // schedule strangely. In the first case, the additional cycles for the + // group will be ignored. In the second, the group will be ignored + // entirely. + for (const MCWriteProcResEntry &PE : + make_range(SchedModel->getWriteProcResBegin(SC), + SchedModel->getWriteProcResEnd(SC))) + if (ResourceGroupSubUnitMasks[PIdx] & (1ULL << PE.ProcResourceIdx)) + return std::make_pair(0u, StartIndex); + + auto SubUnits = SchedModel->getProcResource(PIdx)->SubUnitsIdxBegin; + for (unsigned I = 0, End = NumberOfInstances; I < End; ++I) { + unsigned NextUnreserved, NextInstanceIdx; + std::tie(NextUnreserved, NextInstanceIdx) = + getNextResourceCycle(SC, SubUnits[I], Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = NextInstanceIdx; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); + } + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; ++I) { unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); @@ -2152,7 +2193,7 @@ unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; unsigned NRCycle, InstanceIdx; - std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); + std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(SC, ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); @@ -2302,8 +2343,8 @@ /// /// \return the next cycle at which the instruction may execute without /// oversubscribing resources. -unsigned SchedBoundary:: -countResource(unsigned PIdx, unsigned Cycles, unsigned NextCycle) { +unsigned SchedBoundary::countResource(const MCSchedClassDesc *SC, unsigned PIdx, + unsigned Cycles, unsigned NextCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); unsigned Count = Factor * Cycles; LLVM_DEBUG(dbgs() << " " << SchedModel->getResourceName(PIdx) << " +" @@ -2325,7 +2366,7 @@ } // For reserved resources, record the highest cycle using the resource. unsigned NextAvailable, InstanceIdx; - std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); + std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(SC, PIdx, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getResourceName(PIdx) @@ -2405,7 +2446,7 @@ PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned RCycle = - countResource(PI->ProcResourceIdx, PI->Cycles, NextCycle); + countResource(SC, PI->ProcResourceIdx, PI->Cycles, NextCycle); if (RCycle > NextCycle) NextCycle = RCycle; } @@ -2420,7 +2461,8 @@ unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { unsigned ReservedUntil, InstanceIdx; - std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); + std::tie(ReservedUntil, InstanceIdx) = + getNextResourceCycle(SC, PIdx, 0); if (isTop()) { ReservedCycles[InstanceIdx] = std::max(ReservedUntil, NextCycle + PI->Cycles); diff --git a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir --- a/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir +++ b/llvm/test/CodeGen/ARM/cortex-m7-wideops.mir @@ -22,9 +22,9 @@ ; CHECK-LABEL: name: test_groups ; CHECK: liveins: $d0, $r0, $r1, $r2, $r3, $r4 ; CHECK: renamable $d0 = VADDD killed renamable $d0, renamable $d0, 14 /* CC::al */, $noreg + ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: renamable $s2 = VLDRS killed renamable $r0, 0, 14 /* CC::al */, $noreg ; CHECK: VSTRS killed renamable $s2, killed renamable $r1, 0, 14 /* CC::al */, $noreg - ; CHECK: renamable $r3 = t2ADDrr killed renamable $r3, renamable $r3, 14 /* CC::al */, $noreg, $noreg ; CHECK: t2STRi12 killed renamable $r3, killed renamable $r2, 0, 14 /* CC::al */, $noreg ; CHECK: renamable $r4 = t2ADDrr killed renamable $r4, renamable $r4, 14 /* CC::al */, $noreg, $noreg ; CHECK: tBX_RET 14 /* CC::al */, $noreg, implicit killed $d0