Index: llvm/include/llvm/CodeGen/MachineScheduler.h =================================================================== --- llvm/include/llvm/CodeGen/MachineScheduler.h +++ llvm/include/llvm/CodeGen/MachineScheduler.h @@ -681,6 +681,10 @@ // scheduled instruction. SmallVector ReservedCycles; + // For each PIdx, stores first index into ReservedCycles that corresponds to + // it. + SmallVector ProcessorResourceList; + #ifndef NDEBUG // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -755,7 +759,12 @@ /// cycle. unsigned getLatencyStallCycles(SUnit *SU); - unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles); + unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, + unsigned Cycles); + + std::pair + getNextResourceCycle(unsigned PIdx, unsigned InstanceCount, + unsigned Cycles); bool checkHazard(SUnit *SU); Index: llvm/include/llvm/CodeGen/TargetSchedule.h =================================================================== --- llvm/include/llvm/CodeGen/TargetSchedule.h +++ llvm/include/llvm/CodeGen/TargetSchedule.h @@ -115,6 +115,11 @@ return SchedModel.getProcResource(PIdx); } + /// Get the processor resource table. + const MCProcResourceDesc *getProcResourceTable() const { + return SchedModel.ProcResourceTable; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const char *getResourceName(unsigned PIdx) const { if (!PIdx) Index: llvm/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/lib/CodeGen/MachineScheduler.cpp +++ llvm/lib/CodeGen/MachineScheduler.cpp @@ -1882,6 +1882,7 @@ ZoneCritResIdx = 0; IsResourceLimited = false; ReservedCycles.clear(); + ProcessorResourceList.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -1920,8 +1921,18 @@ SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) { - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); - ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + const MCProcResourceDesc *Resources = SchedModel->getProcResourceTable(); + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + ProcessorResourceList.resize(ResourceCount); + ExecutedResCounts.resize(ResourceCount); + unsigned NumUnits = 0; + + for (unsigned i = 0; i < ResourceCount; ++i) { + ProcessorResourceList[i] = NumUnits; + NumUnits += (Resources + i)->NumUnits; + } + + ReservedCycles.resize(NumUnits, InvalidCycle); } } @@ -1942,11 +1953,12 @@ return 0; } -/// Compute the next cycle at which the given processor resource can be -/// scheduled. + +/// Compute the next cycle at which the given processor resource unit +/// can be scheduled. unsigned SchedBoundary:: -getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned NextUnreserved = ReservedCycles[PIdx]; +getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[InstanceIndex]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) return 0; @@ -1956,6 +1968,29 @@ return NextUnreserved; } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. Returns (NextCycle, InstanceIDAssignedTo). +std::pair SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned ResCount, unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; + unsigned InstanceID = 0; + unsigned StartIndex = ProcessorResourceList[PIdx]; + unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; + assert(NumberOfInstances > 0 && + "Cannot have zero instances of a ProcResource"); + + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = + getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceID = I; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceID); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -1996,8 +2031,11 @@ make_range(SchedModel->getWriteProcResBegin(SC), SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; + unsigned ResCount = SchedModel->getProcResource(ResIdx)->NumUnits; unsigned Cycles = PE.Cycles; - unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); + unsigned NRCycle, InstanceID; + std::tie(NRCycle, InstanceID) = + getNextResourceCycle(ResIdx, ResCount, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); @@ -2159,10 +2197,14 @@ << "c\n"); } // For reserved resources, record the highest cycle using the resource. - unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + unsigned ResCount = SchedModel->getProcResource(PIdx)->NumUnits; + unsigned NextAvailable, InstanceID; + std::tie(NextAvailable, InstanceID) = + getNextResourceCycle(PIdx, ResCount, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getProcResource(PIdx)->Name + << " instance " << InstanceID << " reserved until @" << NextAvailable << "\n"); } return NextAvailable; @@ -2250,12 +2292,16 @@ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + unsigned ResCount = SchedModel->getProcResource(PIdx)->NumUnits; + unsigned ReservedUntil, InstanceIndex; + std::tie(ReservedUntil, InstanceIndex) = + getNextResourceCycle(PIdx, ResCount, 0); if (isTop()) { - ReservedCycles[PIdx] = - std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + ReservedCycles[InstanceIndex] = + std::max(ReservedUntil, NextCycle + PI->Cycles); } else - ReservedCycles[PIdx] = NextCycle; + ReservedCycles[InstanceIndex] = NextCycle; } } } Index: llvm/test/CodeGen/AArch64/misched-fusion-aes.ll =================================================================== --- llvm/test/CodeGen/AArch64/misched-fusion-aes.ll +++ llvm/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -205,7 +205,9 @@ ; CHECK-LABEL: aes_load_store: ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesmc [[VA]], [[VA]] +; aese and aesmc are described to share a unit, hence won't be scheduled on the +; same cycle and the scheduler can find another instruction to place inbetween +; CHECK: aesmc [[VA]], [[VA]] ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesmc [[VB]], [[VB]] ; CHECK-NOT: aesmc Index: llvm/test/CodeGen/ARM/proc-resource-sched.ll =================================================================== --- /dev/null +++ llvm/test/CodeGen/ARM/proc-resource-sched.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-r52 -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-R52 +; REQUIRES: asserts + +; source_filename = "sched-2.c" +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr { +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %add, %add1 + ret i32 %div +} + +; Cortex-R52 model describes it as dual-issue with two integer ALUs +; It should be able to issue the two additions in the same cycle. +; CHECK-R52: MI Scheduling +; CHECK-R52: Cycle: 14 +; CHECK-R52: Scheduling SU(5) %5:gpr = nsw ADDrr %3:gpr, %2:gpr, 14, $noreg, $noreg +; CHECK-R52: Scheduling SU(4) %4:gpr = nsw ADDrr %1:gpr, %0:gpr, 14, $noreg, $noreg +; CHECK-R52: Cycle: 15