Index: llvm/trunk/include/llvm/CodeGen/MachineScheduler.h =================================================================== --- llvm/trunk/include/llvm/CodeGen/MachineScheduler.h +++ llvm/trunk/include/llvm/CodeGen/MachineScheduler.h @@ -666,6 +666,10 @@ // scheduled instruction. SmallVector ReservedCycles; + // For each PIdx, stores first index into ReservedCycles that corresponds to + // it. + SmallVector ReservedCyclesIndex; + #ifndef NDEBUG // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -740,7 +744,11 @@ /// cycle. unsigned getLatencyStallCycles(SUnit *SU); - unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles); + unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, + unsigned Cycles); + + std::pair getNextResourceCycle(unsigned PIdx, + unsigned Cycles); bool checkHazard(SUnit *SU); Index: llvm/trunk/lib/CodeGen/MachineScheduler.cpp =================================================================== --- llvm/trunk/lib/CodeGen/MachineScheduler.cpp +++ llvm/trunk/lib/CodeGen/MachineScheduler.cpp @@ -1863,6 +1863,7 @@ ZoneCritResIdx = 0; IsResourceLimited = false; ReservedCycles.clear(); + ReservedCyclesIndex.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -1901,8 +1902,17 @@ SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) { - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); - ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + ReservedCyclesIndex.resize(ResourceCount); + ExecutedResCounts.resize(ResourceCount); + unsigned NumUnits = 0; + + for (unsigned i = 0; i < ResourceCount; ++i) { + ReservedCyclesIndex[i] = NumUnits; + NumUnits += SchedModel->getProcResource(i)->NumUnits; + } + + ReservedCycles.resize(NumUnits, InvalidCycle); } } @@ -1923,11 +1933,11 @@ return 0; } -/// Compute the next cycle at which the given processor resource can be -/// scheduled. -unsigned SchedBoundary:: -getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned NextUnreserved = ReservedCycles[PIdx]; +/// Compute the next cycle at which the given processor resource unit +/// can be scheduled. +unsigned SchedBoundary::getNextResourceCycleByInstance(unsigned InstanceIdx, + unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[InstanceIdx]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) return 0; @@ -1937,6 +1947,29 @@ return NextUnreserved; } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. Returns the next cycle and the index of the processor resource +/// instance in the reserved cycles vector. +std::pair +SchedBoundary::getNextResourceCycle(unsigned PIdx, unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; + unsigned InstanceIdx = 0; + unsigned StartIndex = ReservedCyclesIndex[PIdx]; + unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; + assert(NumberOfInstances > 0 && + "Cannot have zero instances of a ProcResource"); + + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceIdx = I; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceIdx); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -1978,14 +2011,16 @@ SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; unsigned Cycles = PE.Cycles; - unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); + unsigned NRCycle, InstanceIdx; + std::tie(NRCycle, InstanceIdx) = getNextResourceCycle(ResIdx, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); #endif LLVM_DEBUG(dbgs() << " SU(" << SU->NodeNum << ") " - << SchedModel->getResourceName(ResIdx) << "=" - << NRCycle << "c\n"); + << SchedModel->getResourceName(ResIdx) + << '[' << InstanceIdx - ReservedCyclesIndex[ResIdx] << ']' + << "=" << NRCycle << "c\n"); return true; } } @@ -2140,10 +2175,12 @@ << "c\n"); } // For reserved resources, record the highest cycle using the resource. - unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + unsigned NextAvailable, InstanceIdx; + std::tie(NextAvailable, InstanceIdx) = getNextResourceCycle(PIdx, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " - << SchedModel->getProcResource(PIdx)->Name + << SchedModel->getResourceName(PIdx) + << '[' << InstanceIdx - ReservedCyclesIndex[PIdx] << ']' << " reserved until @" << NextAvailable << "\n"); } return NextAvailable; @@ -2233,12 +2270,13 @@ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + unsigned ReservedUntil, InstanceIdx; + std::tie(ReservedUntil, InstanceIdx) = getNextResourceCycle(PIdx, 0); if (isTop()) { - ReservedCycles[PIdx] = - std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); - } - else - ReservedCycles[PIdx] = NextCycle; + ReservedCycles[InstanceIdx] = + std::max(ReservedUntil, NextCycle + PI->Cycles); + } else + ReservedCycles[InstanceIdx] = NextCycle; } } } Index: llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll =================================================================== --- llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll +++ llvm/trunk/test/CodeGen/AArch64/misched-fusion-aes.ll @@ -205,7 +205,9 @@ ; CHECK-LABEL: aes_load_store: ; CHECK: aese [[VA:v[0-7].16b]], {{v[0-7].16b}} -; CHECK-NEXT: aesmc [[VA]], [[VA]] +; aese and aesmc are described to share a unit, hence won't be scheduled on the +; same cycle and the scheduler can find another instruction to place inbetween +; CHECK: aesmc [[VA]], [[VA]] ; CHECK: aese [[VB:v[0-7].16b]], {{v[0-7].16b}} ; CHECK-NEXT: aesmc [[VB]], [[VB]] ; CHECK-NOT: aesmc Index: llvm/trunk/test/CodeGen/ARM/proc-resource-sched.ll =================================================================== --- llvm/trunk/test/CodeGen/ARM/proc-resource-sched.ll +++ llvm/trunk/test/CodeGen/ARM/proc-resource-sched.ll @@ -0,0 +1,21 @@ +; RUN: llc -mtriple=arm-eabi -mcpu=cortex-r52 -debug-only=machine-scheduler %s -o - 2>&1 | FileCheck %s --check-prefix=CHECK-R52 +; REQUIRES: asserts + +; source_filename = "sched-2.c" +target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + +define dso_local i32 @f(i32 %a, i32 %b, i32 %c, i32 %d) local_unnamed_addr { +entry: + %add = add nsw i32 %b, %a + %add1 = add nsw i32 %d, %c + %div = sdiv i32 %add, %add1 + ret i32 %div +} + +; Cortex-R52 model describes it as dual-issue with two integer ALUs +; It should be able to issue the two additions in the same cycle. +; CHECK-R52: MI Scheduling +; CHECK-R52: Cycle: 14 +; CHECK-R52: Scheduling SU(5) %5:gpr = nsw ADDrr %3:gpr, %2:gpr, 14, $noreg, $noreg +; CHECK-R52: Scheduling SU(4) %4:gpr = nsw ADDrr %1:gpr, %0:gpr, 14, $noreg, $noreg +; CHECK-R52: Cycle: 15