Index: lib/CodeGen/MachinePipeliner.cpp =================================================================== --- lib/CodeGen/MachinePipeliner.cpp +++ lib/CodeGen/MachinePipeliner.cpp @@ -192,6 +192,8 @@ const InstrItineraryData *InstrItins; const TargetInstrInfo *TII = nullptr; RegisterClassInfo RegClassInfo; + bool disabledByPragma = false; + unsigned II_setByPragma = 0; #ifndef NDEBUG static int NumTries; @@ -209,7 +211,8 @@ static char ID; - MachinePipeliner() : MachineFunctionPass(ID) { + MachinePipeliner() + : MachineFunctionPass(ID), disabledByPragma(false), II_setByPragma(0) { initializeMachinePipelinerPass(*PassRegistry::getPassRegistry()); } @@ -229,6 +232,7 @@ bool canPipelineLoop(MachineLoop &L); bool scheduleLoop(MachineLoop &L); bool swingModuloScheduler(MachineLoop &L); + void setPragmaPipelineOptions(MachineLoop &L); }; /// This class builds the dependence graph for the instructions in a loop, @@ -237,11 +241,14 @@ MachinePipeliner &Pass; /// The minimum initiation interval between iterations for this schedule. unsigned MII = 0; + /// The maximum initiation interval between iterations for this schedule. + unsigned MAX_II = 0; /// Set to true if a valid pipelined schedule is found for the loop. bool Scheduled = false; MachineLoop &Loop; LiveIntervals &LIS; const RegisterClassInfo &RegClassInfo; + unsigned II_setByPragma = 0; /// A toplogical ordering of the SUnits, which is needed for changing /// dependences and iterating over the SUnits. @@ -319,9 +326,9 @@ public: SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis, - const RegisterClassInfo &rci) + const RegisterClassInfo &rci, unsigned II) : ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis), - RegClassInfo(rci), Topo(SUnits, &ExitSU) { + RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) { P.MF->getSubtarget().getSMSMutations(Mutations); if (SwpEnableCopyToPhi) Mutations.push_back(llvm::make_unique()); @@ -382,9 +389,6 @@ return 0; } - /// Set the Minimum Initiation Interval for this schedule attempt. - void setMII(unsigned mii) { MII = mii; } - void applyInstrChange(MachineInstr *MI, SMSchedule &Schedule); void fixupRegisterOverlaps(std::deque &Instrs); @@ -475,6 +479,10 @@ unsigned &OffsetPos, unsigned &NewBase, int64_t &NewOffset); void postprocessDAG(); + /// Set the Minimum Initiation Interval for this schedule attempt. + void setMII(unsigned ResMII, unsigned RecMII); + /// Set the Maximum Initiation Interval for this schedule attempt. + void setMAX_II(); }; /// A NodeSet contains a set of SUnit DAG nodes with additional information @@ -801,8 +809,11 @@ } #endif - if (!canPipelineLoop(L)) + setPragmaPipelineOptions(L); + if (!canPipelineLoop(L)) { + LLVM_DEBUG(dbgs() << "\n!!! Can not pipeline loop.\n"); return Changed; + } ++NumTrytoPipeline; @@ -811,6 +822,52 @@ return Changed; } +void MachinePipeliner::setPragmaPipelineOptions(MachineLoop &L) { + + MachineBasicBlock *LBLK = L.getTopBlock(); + + if (LBLK == nullptr) + return; + + const BasicBlock *BBLK = LBLK->getBasicBlock(); + if (BBLK == nullptr) + return; + + const Instruction *TI = BBLK->getTerminator(); + if (TI == nullptr) + return; + + MDNode *LoopID = TI->getMetadata(LLVMContext::MD_loop); + if (LoopID == nullptr) + return; + + assert(LoopID->getNumOperands() > 0 && "requires atleast one operand"); + assert(LoopID->getOperand(0) == LoopID && "invalid loop"); + + for (unsigned i = 1, e = LoopID->getNumOperands(); i < e; ++i) { + MDNode *MD = dyn_cast(LoopID->getOperand(i)); + + if (MD == nullptr) + continue; + + MDString *S = dyn_cast(MD->getOperand(0)); + + if (S == nullptr) + continue; + + if (S->getString() == "llvm.loop.pipeline.initiationinterval") { + + assert(MD->getNumOperands() == 2 && + "Pipeline initiation interval hint metadata should have two operands."); + II_setByPragma = + mdconst::extract(MD->getOperand(1))->getZExtValue(); + assert(II_setByPragma >= 1 && "Pipeline initiation interval must be positive."); + } else if (S->getString() == "llvm.loop.pipeline.disable") { + disabledByPragma = true; + } + } +} + /// Return true if the loop can be software pipelined. The algorithm is /// restricted to loops with a single basic block. Make sure that the /// branch in the loop can be analyzed. @@ -818,6 +875,10 @@ if (L.getNumBlocks() != 1) return false; + if (disabledByPragma) { + return false; + } + // Check if the branch can't be understood because we can't do pipelining // if that's the case. LI.TBB = nullptr; @@ -876,7 +937,8 @@ bool MachinePipeliner::swingModuloScheduler(MachineLoop &L) { assert(L.getBlocks().size() == 1 && "SMS works on single blocks only."); - SwingSchedulerDAG SMS(*this, L, getAnalysis(), RegClassInfo); + SwingSchedulerDAG SMS(*this, L, getAnalysis(), RegClassInfo, + II_setByPragma); MachineBasicBlock *MBB = L.getHeader(); // The kernel should not include any terminator instructions. These @@ -899,6 +961,23 @@ return SMS.hasNewSchedule(); } +void SwingSchedulerDAG::setMII(unsigned ResMII, unsigned RecMII) { + + if (II_setByPragma > 0) { + MII = II_setByPragma; + } else { + MII = std::max(ResMII, RecMII); + } +} + +void SwingSchedulerDAG::setMAX_II() { + if (II_setByPragma > 0) { + MAX_II = II_setByPragma; + } else { + MAX_II = MII + 10; + } +} + /// We override the schedule function in ScheduleDAGInstrs to implement the /// scheduling part of the Swing Modulo Scheduling algorithm. void SwingSchedulerDAG::schedule() { @@ -925,9 +1004,11 @@ if (SwpIgnoreRecMII) RecMII = 0; - MII = std::max(ResMII, RecMII); - LLVM_DEBUG(dbgs() << "MII = " << MII << " (rec=" << RecMII - << ", res=" << ResMII << ")\n"); + setMII(ResMII, RecMII); + setMAX_II(); + + LLVM_DEBUG(dbgs() << "MII = " << MII << " MAX_II = " << MAX_II + << " (rec=" << RecMII << ", res=" << ResMII << ")\n"); // Can't schedule a loop without a valid MII. if (MII == 0) @@ -2335,8 +2416,9 @@ return false; bool scheduleFound = false; + unsigned II = 0; // Keep increasing II until a valid schedule is found. - for (unsigned II = MII; II < MII + 10 && !scheduleFound; ++II) { + for (II = MII; II <= MAX_II && !scheduleFound; ++II) { Schedule.reset(); Schedule.setInitiationInterval(II); LLVM_DEBUG(dbgs() << "Try to schedule with " << II << "\n"); @@ -2408,7 +2490,8 @@ scheduleFound = Schedule.isValidSchedule(this); } - LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << "\n"); + LLVM_DEBUG(dbgs() << "Schedule Found? " << scheduleFound << " (II=" << II + << ")\n"); if (scheduleFound) Schedule.finalizeSchedule(this); Index: test/CodeGen/Hexagon/swp-pragma-disable.ii =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/swp-pragma-disable.ii @@ -0,0 +1,49 @@ +; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner \ +; RUN: -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s +; REQUIRES: asserts +; +; Test that checks if pipeliner disabled by pragma + +; CHECK: Can not pipeline loop + +; Function Attrs: nounwind +define void @f0(i32* nocapture %a0, i32 %a1) #0 { +b0: + %v0 = icmp sgt i32 %a1, 1 + br i1 %v0, label %b1, label %b4 + +b1: ; preds = %b0 + %v1 = load i32, i32* %a0, align 4 + %v2 = add i32 %v1, 10 + %v3 = getelementptr i32, i32* %a0, i32 1 + %v4 = add i32 %a1, -1 + br label %b2 + +b2: ; preds = %b2, %b1 + %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ] + %v6 = phi i32* [ %v11, %b2 ], [ %v3, %b1 ] + %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ] + store i32 %v7, i32* %v6, align 4 + %v8 = add i32 %v7, 10 + %v9 = getelementptr i32, i32* %v6, i32 -1 + store i32 %v8, i32* %v9, align 4 + %v10 = add i32 %v7, 10 + %v11 = getelementptr i32, i32* %v6, i32 1 + %v12 = add i32 %v5, -1 + %v13 = icmp eq i32 %v12, 0 + br i1 %v13, label %b3, label %b2 + +b3: ; preds = %b2 + br label %b4 , !llvm.loop !2 + +b4: ; preds = %b3, %b0 + ret void +} + +attributes #0 = { nounwind } + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2, !2, i64 0} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.pipeline.disable", i1 true} + Index: test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii =================================================================== --- /dev/null +++ test/CodeGen/Hexagon/swp-pragma-initiation-interval.ii @@ -0,0 +1,49 @@ +; RUN: llc -disable-lsr -march=hexagon -enable-pipeliner \ +; RUN: -debug-only=pipeliner < %s 2>&1 > /dev/null | FileCheck %s +; REQUIRES: asserts +; +; Test that checks if the II set by pragma was taken by pipeliner. + +; CHECK: MII = 2 MAX_II = 2 + +; Function Attrs: nounwind +define void @f0(i32* nocapture %a0, i32 %a1) #0 { +b0: + %v0 = icmp sgt i32 %a1, 1 + br i1 %v0, label %b1, label %b4 + +b1: ; preds = %b0 + %v1 = load i32, i32* %a0, align 4 + %v2 = add i32 %v1, 10 + %v3 = getelementptr i32, i32* %a0, i32 1 + %v4 = add i32 %a1, -1 + br label %b2 + +b2: ; preds = %b2, %b1 + %v5 = phi i32 [ %v12, %b2 ], [ %v4, %b1 ] + %v6 = phi i32* [ %v11, %b2 ], [ %v3, %b1 ] + %v7 = phi i32 [ %v10, %b2 ], [ %v2, %b1 ] + store i32 %v7, i32* %v6, align 4 + %v8 = add i32 %v7, 10 + %v9 = getelementptr i32, i32* %v6, i32 -1 + store i32 %v8, i32* %v9, align 4 + %v10 = add i32 %v7, 10 + %v11 = getelementptr i32, i32* %v6, i32 1 + %v12 = add i32 %v5, -1 + %v13 = icmp eq i32 %v12, 0 + br i1 %v13, label %b3, label %b2 + +b3: ; preds = %b2 + br label %b4 , !llvm.loop !2 + +b4: ; preds = %b3, %b0 + ret void +} + +attributes #0 = { nounwind } + +!0 = !{i32 1, !"wchar_size", i32 4} +!1 = !{!2, !2, i64 0} +!2 = distinct !{!2, !3} +!3 = !{!"llvm.loop.pipeline.initiationinterval", i32 2} + Index: test/CodeGen/Hexagon/swp-resmii-1.ll =================================================================== --- test/CodeGen/Hexagon/swp-resmii-1.ll +++ test/CodeGen/Hexagon/swp-resmii-1.ll @@ -3,7 +3,7 @@ ; Test that checks that we compute the correct ResMII for haar. -; CHECK: MII = 4 (rec=1, res=4) +; CHECK: MII = 4 MAX_II = 14 (rec=1, res=4) ; Function Attrs: nounwind define void @f0(i16* noalias nocapture readonly %a0, i32 %a1, i32 %a2, i32 %a3, i8* noalias nocapture %a4, i32 %a5) #0 { Index: test/CodeGen/Hexagon/swp-resmii.ll =================================================================== --- test/CodeGen/Hexagon/swp-resmii.ll +++ test/CodeGen/Hexagon/swp-resmii.ll @@ -4,7 +4,7 @@ ; ; Test that checks if the ResMII is 1. -; CHECK: MII = 1 (rec=1, res=1) +; CHECK: MII = 1 MAX_II = 11 (rec=1, res=1) ; Function Attrs: nounwind define void @f0(i32* nocapture %a0, i32 %a1) #0 {