Index: include/llvm/CodeGen/MachineScheduler.h =================================================================== --- include/llvm/CodeGen/MachineScheduler.h +++ include/llvm/CodeGen/MachineScheduler.h @@ -675,6 +675,10 @@ // scheduled instruction. SmallVector ReservedCycles; + // For each PIdx, stores first index into ReservedCycles that corresponds to + // it. + SmallVector ProcessorResourceList; + #ifndef NDEBUG // Remember the greatest possible stall as an upper bound on the number of // times we should retry the pending queue because of a hazard. @@ -749,7 +753,12 @@ /// cycle. unsigned getLatencyStallCycles(SUnit *SU); - unsigned getNextResourceCycle(unsigned PIdx, unsigned Cycles); + unsigned getNextResourceCycleByInstance(unsigned InstanceIndex, + unsigned Cycles); + + std::pair + getNextResourceCycle(unsigned PIdx, unsigned InstanceCount, + unsigned Cycles); bool checkHazard(SUnit *SU); Index: include/llvm/CodeGen/TargetSchedule.h =================================================================== --- include/llvm/CodeGen/TargetSchedule.h +++ include/llvm/CodeGen/TargetSchedule.h @@ -116,6 +116,11 @@ return SchedModel.getProcResource(PIdx); } + /// Get the processor resource table. + const MCProcResourceDesc *getProcResourceTable() const { + return SchedModel.ProcResourceTable; + } + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) const char *getResourceName(unsigned PIdx) const { if (!PIdx) Index: lib/CodeGen/MachineScheduler.cpp =================================================================== --- lib/CodeGen/MachineScheduler.cpp +++ lib/CodeGen/MachineScheduler.cpp @@ -1846,6 +1846,7 @@ ZoneCritResIdx = 0; IsResourceLimited = false; ReservedCycles.clear(); + ProcessorResourceList.clear(); #ifndef NDEBUG // Track the maximum number of stall cycles that could arise either from the // latency of a DAG edge or the number of cycles that a processor resource is @@ -1884,8 +1885,18 @@ SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) { - ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); - ReservedCycles.resize(SchedModel->getNumProcResourceKinds(), InvalidCycle); + const MCProcResourceDesc *Resources = SchedModel->getProcResourceTable(); + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + ProcessorResourceList.resize(ResourceCount); + ExecutedResCounts.resize(ResourceCount); + unsigned NumUnits = 0; + + for (unsigned i = 0; i < ResourceCount; ++i) { + ProcessorResourceList[i] = NumUnits; + NumUnits += (Resources + i)->NumUnits; + } + + ReservedCycles.resize(NumUnits, InvalidCycle); } } @@ -1906,11 +1917,12 @@ return 0; } -/// Compute the next cycle at which the given processor resource can be -/// scheduled. + +/// Compute the next cycle at which the given processor resource unit +/// can be scheduled. unsigned SchedBoundary:: -getNextResourceCycle(unsigned PIdx, unsigned Cycles) { - unsigned NextUnreserved = ReservedCycles[PIdx]; +getNextResourceCycleByInstance(unsigned InstanceIndex, unsigned Cycles) { + unsigned NextUnreserved = ReservedCycles[InstanceIndex]; // If this resource has never been used, always return cycle zero. if (NextUnreserved == InvalidCycle) return 0; @@ -1920,6 +1932,29 @@ return NextUnreserved; } +/// Compute the next cycle at which the given processor resource can be +/// scheduled. Returns (NextCycle, InstanceIDAssignedTo). +std::pair SchedBoundary:: +getNextResourceCycle(unsigned PIdx, unsigned ResCount, unsigned Cycles) { + unsigned MinNextUnreserved = InvalidCycle; + unsigned InstanceID = 0; + unsigned StartIndex = ProcessorResourceList[PIdx]; + unsigned NumberOfInstances = SchedModel->getProcResource(PIdx)->NumUnits; + assert(NumberOfInstances > 0 && + "Cannot have zero instances of a ProcResource"); + + for (unsigned I = StartIndex, End = StartIndex + NumberOfInstances; I < End; + ++I) { + unsigned NextUnreserved = + getNextResourceCycleByInstance(I, Cycles); + if (MinNextUnreserved > NextUnreserved) { + InstanceID = I; + MinNextUnreserved = NextUnreserved; + } + } + return std::make_pair(MinNextUnreserved, InstanceID); +} + /// Does this SU have a hazard within the current instruction group. /// /// The scheduler supports two modes of hazard recognition. The first is the @@ -1960,8 +1995,11 @@ make_range(SchedModel->getWriteProcResBegin(SC), SchedModel->getWriteProcResEnd(SC))) { unsigned ResIdx = PE.ProcResourceIdx; + unsigned ResCount = SchedModel->getProcResource(ResIdx)->NumUnits; unsigned Cycles = PE.Cycles; - unsigned NRCycle = getNextResourceCycle(ResIdx, Cycles); + unsigned NRCycle, InstanceID; + std::tie(NRCycle, InstanceID) = + getNextResourceCycle(ResIdx, ResCount, Cycles); if (NRCycle > CurrCycle) { #ifndef NDEBUG MaxObservedStall = std::max(Cycles, MaxObservedStall); @@ -2123,10 +2161,14 @@ << "c\n"); } // For reserved resources, record the highest cycle using the resource. - unsigned NextAvailable = getNextResourceCycle(PIdx, Cycles); + unsigned ResCount = SchedModel->getProcResource(PIdx)->NumUnits; + unsigned NextAvailable, InstanceID; + std::tie(NextAvailable, InstanceID) = + getNextResourceCycle(PIdx, ResCount, Cycles); if (NextAvailable > CurrCycle) { LLVM_DEBUG(dbgs() << " Resource conflict: " << SchedModel->getProcResource(PIdx)->Name + << " instance " << InstanceID << " reserved until @" << NextAvailable << "\n"); } return NextAvailable; @@ -2214,12 +2256,16 @@ PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { unsigned PIdx = PI->ProcResourceIdx; if (SchedModel->getProcResource(PIdx)->BufferSize == 0) { + unsigned ResCount = SchedModel->getProcResource(PIdx)->NumUnits; + unsigned ReservedUntil, InstanceIndex; + std::tie(ReservedUntil, InstanceIndex) = + getNextResourceCycle(PIdx, ResCount, 0); if (isTop()) { - ReservedCycles[PIdx] = - std::max(getNextResourceCycle(PIdx, 0), NextCycle + PI->Cycles); + ReservedCycles[InstanceIndex] = + std::max(ReservedUntil, NextCycle + PI->Cycles); } else - ReservedCycles[PIdx] = NextCycle; + ReservedCycles[InstanceIndex] = NextCycle; } } } Index: test/CodeGen/ARM/proc_resouces_schedule.ll =================================================================== --- /dev/null +++ test/CodeGen/ARM/proc_resouces_schedule.ll @@ -0,0 +1,72 @@ +; RUN: llc -mtriple=arm-linux-none-eabi -O3 -mcpu=cortex-r52 --debug 2>&1 < %s | FileCheck %s + +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} +; CHECK: *** Max MOps {{[0-9]}} at cycle {{[0-9]+}} + +define dso_local void @test(i32* %res, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %x, i32 %y, i32 %z) #0 { +entry: + %res.addr = alloca i32*, align 4 + %a.addr = alloca i32, align 4 + %b.addr = alloca i32, align 4 + %c.addr = alloca i32, align 4 + %d.addr = alloca i32, align 4 + %e.addr = alloca i32, align 4 + %x.addr = alloca i32, align 4 + %y.addr = alloca i32, align 4 + %z.addr = alloca i32, align 4 + store i32* %res, i32** %res.addr, align 4 + store i32 %a, i32* %a.addr, align 4 + store i32 %b, i32* %b.addr, align 4 + store i32 %c, i32* %c.addr, align 4 + store i32 %d, i32* %d.addr, align 4 + store i32 %e, i32* %e.addr, align 4 + store i32 %x, i32* %x.addr, align 4 + store i32 %y, i32* %y.addr, align 4 + store i32 %z, i32* %z.addr, align 4 + %0 = load i32, i32* %a.addr, align 4 + %1 = load i32, i32* %b.addr, align 4 + %add = add nsw i32 %0, %1 + %2 = load i32, i32* %c.addr, align 4 + %add1 = add nsw i32 %add, %2 + %3 = load i32, i32* %x.addr, align 4 + %add2 = add nsw i32 %add1, %3 + %4 = load i32, i32* %y.addr, align 4 + %add3 = add nsw i32 %add2, %4 + %5 = load i32, i32* %z.addr, align 4 + %add4 = add nsw i32 %add3, %5 + %6 = load i32*, i32** %res.addr, align 4 + %arrayidx = getelementptr inbounds i32, i32* %6, i32 1 + store i32 %add4, i32* %arrayidx, align 4 + %7 = load i32, i32* %c.addr, align 4 + %8 = load i32, i32* %d.addr, align 4 + %add5 = add nsw i32 %7, %8 + %9 = load i32, i32* %e.addr, align 4 + %add6 = add nsw i32 %add5, %9 + %10 = load i32, i32* %x.addr, align 4 + %add7 = add nsw i32 %add6, %10 + %11 = load i32, i32* %y.addr, align 4 + %add8 = add nsw i32 %add7, %11 + %12 = load i32, i32* %z.addr, align 4 + %add9 = add nsw i32 %add8, %12 + %13 = load i32*, i32** %res.addr, align 4 + %arrayidx10 = getelementptr inbounds i32, i32* %13, i32 2 + store i32 %add9, i32* %arrayidx10, align 4 + ret void +} + Index: test/CodeGen/ARM/saxpy10-a9.ll =================================================================== --- test/CodeGen/ARM/saxpy10-a9.ll +++ test/CodeGen/ARM/saxpy10-a9.ll @@ -9,11 +9,15 @@ ; should be nicely pipelined. ; ; CHECK: saxpy10: -; CHECK: vldr -; CHECK: vldr -; CHECK: vldr -; CHECK: vldr -; CHECK: vldr +; CHECK: vmov +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vldr +; CHECK-NEXT: vldr +; CHECK-NEXT: vmul +; CHECK-NEXT: vldr +; CHECK-NEXT: vadd +; CHECK-NEXT: vldr ; CHECK-NEXT: vldr ; CHECK-NEXT: vmul ; CHECK-NEXT: vadd