Index: lib/CodeGen/MachinePipeliner.cpp =================================================================== --- lib/CodeGen/MachinePipeliner.cpp +++ lib/CodeGen/MachinePipeliner.cpp @@ -119,6 +119,8 @@ #include #include +/* #define USE_DFAPacketizer_P 1 */ + using namespace llvm; #define DEBUG_TYPE "pipeliner" @@ -601,12 +603,18 @@ /// Virtual register information. MachineRegisterInfo &MRI; +#ifdef USE_DFAPacketizer_P std::unique_ptr Resources; - +#endif /* USE_DFAPacketizer_P */ public: +#ifdef USE_DFAPacketizer_P SMSchedule(MachineFunction *mf) : ST(mf->getSubtarget()), MRI(mf->getRegInfo()), Resources(ST.getInstrInfo()->CreateTargetScheduleState(ST)) {} +#else + SMSchedule(MachineFunction *mf) + : ST(mf->getSubtarget()), MRI(mf->getRegInfo()) {} +#endif /* USE_DFAPacketizer_P */ void reset() { ScheduledInstrs.clear(); @@ -1309,6 +1317,7 @@ } // end anonymous namespace +#ifdef USE_DFAPacketizer_P /// Calculate the resource constrained minimum initiation interval for the /// specified loop. We use the DFA to model the resources needed for /// each instruction, and we ignore dependences. A different DFA is created @@ -1377,6 +1386,19 @@ Resources.clear(); return Resmii; } +#else +unsigned SwingSchedulerDAG::calculateResMII() { + // Consider only issue width + MachineBasicBlock *MBB = Loop.getHeader(); + unsigned size = 0; + for (MachineBasicBlock::iterator I = MBB->getFirstNonPHI(), + E = MBB->getFirstTerminator(); + I != E; ++I) { + size++; + } + return (size + SchedModel.getIssueWidth() - 1) / SchedModel.getIssueWidth(); +} +#endif /* USE_DFAPacketizer_P */ /// Calculate the recurrence-constrainted minimum initiation interval. /// Iterate over each circuit. Compute the delay(c) and distance(c) @@ -3483,6 +3505,7 @@ M->apply(this); } +#ifdef USE_DFAPacketizer_P /// Try to schedule the node at the specified StartCycle and continue /// until the node is schedule or the EndCycle is reached. This function /// returns true if the node is scheduled. This routine may search either @@ -3536,6 +3559,127 @@ } return false; } +#else +static void clearResources(std::vector &ResourceTable) { + for (unsigned Idx = 0; Idx < ResourceTable.size(); ++Idx) { + ResourceTable[Idx] = 0; + } +} + +static bool canReserveResources(const TargetSubtargetInfo *STI, + const MCSchedModel &SchedModel, + std::vector &ResourceTable, + SUnit *SU) { + unsigned SchedClass = SU->getInstr()->getDesc().getSchedClass(); + const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(SchedClass); + unsigned Issues = 0; + for (unsigned Idx = 0; Idx < ResourceTable.size(); ++Idx) { + Issues += ResourceTable[Idx]; + } + if (Issues >= SchedModel.IssueWidth) + return false; + + // TODO: This process is not accurate. + // Information on elements of the set of ProcResGroup as follows is necessary. + // [TargetSchedule] Expose sub-units of a ProcResGroup in MCProcResourceDesc. + // https://reviews.llvm.org/D43023 + // In addition, resource management using ResourceCycles is necessary. + bool Check = false; + for (const MCWriteProcResEntry &PRE : + make_range(STI->getWriteProcResBegin(SC), STI->getWriteProcResEnd(SC))) { + unsigned Idx = PRE.ProcResourceIdx; + if (SchedModel.getProcResource(Idx)->NumUnits > ResourceTable[Idx]) { + return true; + } + Check = true; + } + if (!Check) { + ResourceTable[0]++; + return true; + } + return false; +} + +static void reserveResources(const TargetSubtargetInfo *STI, + const MCSchedModel &SchedModel, + std::vector &ResourceTable, SUnit *SU) { + unsigned SchedClass = SU->getInstr()->getDesc().getSchedClass(); + const MCSchedClassDesc *SC = SchedModel.getSchedClassDesc(SchedClass); + bool Check = false; + for (const MCWriteProcResEntry &PRE : + make_range(STI->getWriteProcResBegin(SC), STI->getWriteProcResEnd(SC))) { + unsigned Idx = PRE.ProcResourceIdx; + if (SchedModel.getProcResource(Idx)->NumUnits > ResourceTable[Idx]) { + ResourceTable[Idx]++; + return; + } + Check = true; + } + if (!Check) { + ResourceTable[0]++; + return; + } + llvm_unreachable("reserveResources"); +} + +bool SMSchedule::insert(SUnit *SU, int StartCycle, int EndCycle, int II) { + const TargetSubtargetInfo *STI = &ST; + std::vector ResourceTable; + const MCSchedModel &SchedModel = ST.getSchedModel(); + { + unsigned NumRes = SchedModel.getNumProcResourceKinds(); + for (unsigned Idx = 0; Idx < NumRes; ++Idx) { + ResourceTable.push_back(0); + } + } + bool forward = true; + if (StartCycle > EndCycle) + forward = false; + + // The terminating condition depends on the direction. + int termCycle = forward ? EndCycle + 1 : EndCycle - 1; + for (int curCycle = StartCycle; curCycle != termCycle; + forward ? ++curCycle : --curCycle) { + + // Add the already scheduled instructions at the specified cycle. + clearResources(ResourceTable); + for (int checkCycle = FirstCycle + ((curCycle - FirstCycle) % II); + checkCycle <= LastCycle; checkCycle += II) { + std::deque &cycleInstrs = ScheduledInstrs[checkCycle]; + + for (std::deque::iterator I = cycleInstrs.begin(), + E = cycleInstrs.end(); + I != E; ++I) { + if (ST.getInstrInfo()->isZeroCost((*I)->getInstr()->getOpcode())) + continue; + assert(canReserveResources(STI, SchedModel, ResourceTable, (*I)) && + "These instructions have already been scheduled."); + reserveResources(STI, SchedModel, ResourceTable, (*I)); + } + } + if (ST.getInstrInfo()->isZeroCost(SU->getInstr()->getOpcode()) || + canReserveResources(STI, SchedModel, ResourceTable, SU)) { + DEBUG({ + dbgs() << "\tinsert at cycle " << curCycle << " "; + SU->getInstr()->dump(); + }); + + ScheduledInstrs[curCycle].push_back(SU); + InstrToCycle.insert(std::make_pair(SU, curCycle)); + if (curCycle > LastCycle) + LastCycle = curCycle; + if (curCycle < FirstCycle) + FirstCycle = curCycle; + return true; + } + DEBUG({ + dbgs() << "\tfailed to insert at cycle " << curCycle << " "; + SU->getInstr()->dump(); + }); + } + return false; +} +#endif /* USE_DFAPacketizer_P */ // Return the cycle of the earliest scheduled instruction in the chain. int SMSchedule::earliestCycleInChain(const SDep &Dep) { Index: lib/Target/AArch64/AArch64InstrInfo.h =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.h +++ lib/Target/AArch64/AArch64InstrInfo.h @@ -293,6 +293,16 @@ MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; + + bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const override; + + unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, + MachineInstr &Cmp, + SmallVectorImpl &Cond, + SmallVectorImpl &PrevInsts, + unsigned Iter, unsigned MaxIter) const override; + bool reverseBranchCondition(SmallVectorImpl &Cond) const override; bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, Index: lib/Target/AArch64/AArch64InstrInfo.cpp =================================================================== --- lib/Target/AArch64/AArch64InstrInfo.cpp +++ lib/Target/AArch64/AArch64InstrInfo.cpp @@ -5063,3 +5063,58 @@ return It; } + +bool AArch64InstrInfo::analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const { + MachineBasicBlock *LoopEnd = L.getBottomBlock(); + MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); + MachineBasicBlock::iterator E = LoopEnd->getFirstNonDebugInstr(); + MachineInstr *BccMI = nullptr; + MachineInstr *CompMI = nullptr; + MachineInstr *CopyMI = nullptr; + MachineInstr *AddMI = nullptr; + for (; I != E; --I) { + if (!BccMI && I->getOpcode() == AArch64::Bcc) { + BccMI = &*I; + AArch64CC::CondCode CC = + (AArch64CC::CondCode)BccMI->getOperand(0).getImm(); + if (CC != AArch64CC::LT) + return true; + } else if (BccMI && !CompMI && I->getOpcode() == AArch64::SUBSXrr) { + CompMI = &*I; + } else if (CompMI && !CopyMI && I->getOpcode() == AArch64::COPY) { + if (CompMI->getOperand(1).getReg() == I->getOperand(1).getReg()) { + CopyMI = &*I; + } + } else if (CopyMI && !AddMI && I->getOpcode() == AArch64::ADDXri) { + if (CompMI->getOperand(1).getReg() == I->getOperand(0).getReg()) { + AddMI = &*I; + } + } else if (AddMI && I->isPHI()) { + if (I->getOperand(0).getReg() == AddMI->getOperand(1).getReg() && + I->getOperand(3).getReg() == CopyMI->getOperand(0).getReg()) { + IndVarInst = AddMI; + CmpInst = CompMI; + return false; + } + } + } + return true; +} + +unsigned +AArch64InstrInfo::reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, + MachineInstr &Cmp, + SmallVectorImpl &Cond, + SmallVectorImpl &PrevInsts, + unsigned Iter, unsigned MaxIter) const { + MachineInstr *CompMI = nullptr; + for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I) { + if (I->getOpcode() == AArch64::SUBSXrr) { + CompMI = &*I; + } + } + unsigned LoopCount = CompMI->getOperand(1).getReg(); + Cond.push_back(MachineOperand::CreateImm(AArch64CC::LT)); + return LoopCount; +} Index: lib/Target/AArch64/AArch64TargetMachine.cpp =================================================================== --- lib/Target/AArch64/AArch64TargetMachine.cpp +++ lib/Target/AArch64/AArch64TargetMachine.cpp @@ -488,6 +488,8 @@ // be register coaleascer friendly. addPass(&PeepholeOptimizerID); } + if (TM->getOptLevel() >= CodeGenOpt::Default) + addPass(&MachinePipelinerID); } void AArch64PassConfig::addPostRegAlloc() { Index: lib/Target/X86/X86InstrInfo.h =================================================================== --- lib/Target/X86/X86InstrInfo.h +++ lib/Target/X86/X86InstrInfo.h @@ -366,6 +366,16 @@ MachineBasicBlock *FBB, ArrayRef Cond, const DebugLoc &DL, int *BytesAdded = nullptr) const override; + + bool analyzeLoop(MachineLoop &L, MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const override; + + unsigned reduceLoopCount(MachineBasicBlock &MBB, MachineInstr *IndVar, + MachineInstr &Cmp, + SmallVectorImpl &Cond, + SmallVectorImpl &PrevInsts, + unsigned Iter, unsigned MaxIter) const override; + bool canInsertSelect(const MachineBasicBlock &, ArrayRef Cond, unsigned, unsigned, int &, int &, int &) const override; void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, Index: lib/Target/X86/X86InstrInfo.cpp =================================================================== --- lib/Target/X86/X86InstrInfo.cpp +++ lib/Target/X86/X86InstrInfo.cpp @@ -10872,3 +10872,49 @@ return It; } + +bool X86InstrInfo::analyzeLoop(MachineLoop &L, + MachineInstr *&IndVarInst, + MachineInstr *&CmpInst) const { + MachineBasicBlock *LoopEnd = L.getBottomBlock(); + MachineBasicBlock::iterator I = LoopEnd->getFirstTerminator(); + MachineBasicBlock::iterator E = LoopEnd->getFirstNonDebugInstr(); + MachineInstr *JumpMI = nullptr; + MachineInstr *CompMI = nullptr; + MachineInstr *AddMI = nullptr; + for (; I != E; --I) { + if (!JumpMI && I->getOpcode() == X86::JL_1) { + JumpMI = &*I; + } else if (JumpMI && !CompMI && I->getOpcode() == X86::CMP64rr) { + CompMI = &*I; + } else if (CompMI && !AddMI && + (I->getOpcode() == X86::INC64r || I->getOpcode() == X86::ADD64ri8) + && CompMI->getOperand(0).getReg() == I->getOperand(0).getReg()) { + AddMI = &*I; + } else if (AddMI && I->isPHI()) { + if (I->getOperand(0).getReg() == AddMI->getOperand(1).getReg() + && I->getOperand(3).getReg() == AddMI->getOperand(0).getReg()) { + IndVarInst = AddMI; + CmpInst = CompMI; + return false; + } + } + } + return true; +} + +unsigned X86InstrInfo::reduceLoopCount(MachineBasicBlock &MBB, + MachineInstr *IndVar, MachineInstr &Cmp, + SmallVectorImpl &Cond, + SmallVectorImpl &PrevInsts, + unsigned Iter, unsigned MaxIter) const { + MachineInstr *CompMI = nullptr; + for (auto I = MBB.instr_rbegin(), E = MBB.instr_rend(); I != E; ++I) { + if (I->getOpcode() == X86::CMP64rr) { + CompMI = &*I; + } + } + unsigned LoopCount = CompMI->getOperand(0).getReg(); + Cond.push_back(MachineOperand::CreateImm(X86::COND_L)); + return LoopCount; +} Index: lib/Target/X86/X86TargetMachine.cpp =================================================================== --- lib/Target/X86/X86TargetMachine.cpp +++ lib/Target/X86/X86TargetMachine.cpp @@ -417,6 +417,9 @@ addPass(createX86CallFrameOptimization()); } + if (TM->getOptLevel() >= CodeGenOpt::Default) + addPass(&MachinePipelinerID); + addPass(createX86FlagsCopyLoweringPass()); addPass(createX86WinAllocaExpander()); }