Index: include/llvm/CodeGen/SchedulerRegistry.h =================================================================== --- include/llvm/CodeGen/SchedulerRegistry.h +++ include/llvm/CodeGen/SchedulerRegistry.h @@ -63,6 +63,12 @@ /// createBURRListDAGScheduler - This creates a bottom up list scheduler that /// schedules nodes in source code order when possible. +ScheduleDAGSDNodes *createGuidedSrcListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel); + + +/// createBURRListDAGScheduler - This creates a bottom up list scheduler that +/// schedules nodes in source code order when possible. ScheduleDAGSDNodes *createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel); Index: lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp =================================================================== --- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -53,6 +53,12 @@ createSourceListDAGScheduler); static RegisterScheduler + guidedSrcListDAGScheduler("guided-src", + "Heuristic based on already scheduled nodes, fall backs" + "on source order priority.", + createGuidedSrcListDAGScheduler); + +static RegisterScheduler hybridListDAGScheduler("list-hybrid", "Bottom-up register pressure aware list scheduling " "which tries to balance latency and register pressure", @@ -1607,6 +1613,22 @@ bool operator()(SUnit* left, SUnit* right) const; }; +// gu_src_ls_rr_sort - Priority function for guided source order scheduler. +struct gu_src_ls_rr_sort : public queue_sort { + enum { + IsBottomUp = true, + HasReadyFilter = false + }; + + RegReductionPQBase *SPQ; + gu_src_ls_rr_sort(RegReductionPQBase *spq) + : SPQ(spq) {} + + bool operator()(SUnit* left, SUnit* right) const; + private: + unsigned getNodeWeight(SDNode *) const; +}; + // src_ls_rr_sort - Priority function for source order scheduler. struct src_ls_rr_sort : public queue_sort { enum { @@ -1707,6 +1729,10 @@ scheduleDAG = scheduleDag; } + ScheduleDAGRRList * getScheduleDAG() { + return scheduleDAG; + } + ScheduleHazardRecognizer* getHazardRec() { return scheduleDAG->getHazardRec(); } @@ -1844,6 +1870,9 @@ typedef RegReductionPriorityQueue SrcRegReductionPriorityQueue; +typedef RegReductionPriorityQueue +GuidedSrcRegReductionPriorityQueue; + typedef RegReductionPriorityQueue HybridBURRPriorityQueue; @@ -2562,6 +2591,77 @@ return BURRSort(left, right, SPQ); } + +unsigned gu_src_ls_rr_sort::getNodeWeight(SDNode *N) const { + unsigned Weight = 0; + SmallVector NodeList; + using SUVecT = std::vector; + using SUVecItr = std::vector::iterator; + SUVecT &Sequence = SPQ->getScheduleDAG()->Sequence; + + auto IsValidNodeListNode = [](SDNode *N) -> bool { + unsigned Opc = N->getOpcode(); + if (Opc == ISD::UNDEF || Opc == ISD::Constant || + Opc == ISD::TargetConstant || Opc == ISD::ConstantFP || + Opc == ISD::TargetConstantFP || Opc == ISD::Register || + Opc == ISD::RegisterMask) + return false; + // Check for chain nodes. + if (N->getNumValues() == 1 && N->getSimpleValueType(0) == MVT::Other) + return false; + return true; + }; + + NodeList.push_back(N); + for (const SDValue &Op : N->op_values()) + NodeList.push_back(Op.getNode()); + + for (SDNode *Node : NodeList) { + if (!IsValidNodeListNode(Node)) + continue; + for (SDNode *User : Node->uses()) { + if (User == N) + continue; + SUVecItr SchedUnit = std::find_if( + Sequence.begin(), Sequence.end(), [&](const SUnit *SU) -> bool { + if (User->getNodeId() >= 0 && + SU->NodeNum == static_cast(User->getNodeId())) + return true; + return false; + }); + if (SchedUnit != Sequence.end()) + Weight += 10; + } + } + return Weight; +} + +// Guided scheduling with src order fall back, otherwise bottom up. +bool gu_src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { + if (int res = checkSpecialNodes(left, right)) + return res > 0; + + SDNode *leftN = left->getNode(); + SDNode *rightN = right->getNode(); + unsigned leftNW = getNodeWeight(leftN); + unsigned rightNW = getNodeWeight(rightN); + + if (leftNW > rightNW) + return false; + else if (leftNW < rightNW) + return true; + + unsigned LOrder = SPQ->getNodeOrdering(left); + unsigned ROrder = SPQ->getNodeOrdering(right); + + // Prefer an ordering where the lower the non-zero order number, the higher + // the preference. + if ((LOrder || ROrder) && LOrder != ROrder) + return LOrder != 0 && (LOrder < ROrder || ROrder == 0); + + return BURRSort(left, right, SPQ); +} + // Source order, otherwise bottom up. bool src_ls_rr_sort::operator()(SUnit *left, SUnit *right) const { if (int res = checkSpecialNodes(left, right)) @@ -3049,6 +3149,20 @@ } llvm::ScheduleDAGSDNodes * +llvm::createGuidedSrcListDAGScheduler(SelectionDAGISel *IS, + CodeGenOpt::Level OptLevel) { + const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); + const TargetInstrInfo *TII = STI.getInstrInfo(); + const TargetRegisterInfo *TRI = STI.getRegisterInfo(); + + GuidedSrcRegReductionPriorityQueue *PQ = + new GuidedSrcRegReductionPriorityQueue(*IS->MF, false, true, TII, TRI, nullptr); + ScheduleDAGRRList *SD = new ScheduleDAGRRList(*IS->MF, false, PQ, OptLevel); + PQ->setScheduleDAG(SD); + return SD; +} + +llvm::ScheduleDAGSDNodes * llvm::createSourceListDAGScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetSubtargetInfo &STI = IS->MF->getSubtarget(); Index: test/CodeGen/X86/guided_sched.ll =================================================================== --- /dev/null +++ test/CodeGen/X86/guided_sched.ll @@ -0,0 +1,65 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; NOTE: Both functions must emit same instruction schedule with -guided-src +;RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck --check-prefixes=DEFAULT-SCHED %s +;RUN: llc < %s -pre-RA-sched=guided-src -mtriple=x86_64-unknown-linux-gnu -mcpu=skx | FileCheck --check-prefixes=GUIDED_SRC-SCHED %s + +define i32 @clz_i128(i64, i64) { +; DEFAULT-SCHED-LABEL: clz_i128: +; DEFAULT-SCHED: # BB#0: +; DEFAULT-SCHED-NEXT: lzcntq %rsi, %rcx +; DEFAULT-SCHED-NEXT: xorl %edx, %edx +; DEFAULT-SCHED-NEXT: lzcntq %rdi, %rax +; DEFAULT-SCHED-NEXT: cmovael %edx, %ecx +; DEFAULT-SCHED-NEXT: addl %ecx, %eax +; DEFAULT-SCHED-NEXT: # kill: %EAX %EAX %RAX +; DEFAULT-SCHED-NEXT: retq +; +; GUIDED_SRC-SCHED-LABEL: clz_i128: +; GUIDED_SRC-SCHED: # BB#0: +; GUIDED_SRC-SCHED-NEXT: lzcntq %rsi, %rcx +; GUIDED_SRC-SCHED-NEXT: xorl %edx, %edx +; GUIDED_SRC-SCHED-NEXT: lzcntq %rdi, %rax +; GUIDED_SRC-SCHED-NEXT: cmovael %edx, %ecx +; GUIDED_SRC-SCHED-NEXT: addl %ecx, %eax +; GUIDED_SRC-SCHED-NEXT: # kill: %EAX %EAX %RAX +; GUIDED_SRC-SCHED-NEXT: retq + %3 = tail call i64 @llvm.ctlz.i64(i64 %1, i1 false) + %4 = tail call i64 @llvm.ctlz.i64(i64 %0, i1 false) + %5 = icmp ne i64 %0, 0 + %6 = select i1 %5, i64 0, i64 %3 + %7 = add nuw nsw i64 %6, %4 + %8 = trunc i64 %7 to i32 + ret i32 %8 +} + +define i32 @clz_i128_swap(i64, i64) { +; DEFAULT-SCHED-LABEL: clz_i128_swap: +; DEFAULT-SCHED: # BB#0: +; DEFAULT-SCHED-NEXT: lzcntq %rdi, %rax +; DEFAULT-SCHED-NEXT: lzcntq %rsi, %rcx +; DEFAULT-SCHED-NEXT: xorl %edx, %edx +; DEFAULT-SCHED-NEXT: testq %rdi, %rdi +; DEFAULT-SCHED-NEXT: cmovnel %edx, %ecx +; DEFAULT-SCHED-NEXT: addl %ecx, %eax +; DEFAULT-SCHED-NEXT: # kill: %EAX %EAX %RAX +; DEFAULT-SCHED-NEXT: retq +; +; GUIDED_SRC-SCHED-LABEL: clz_i128_swap: +; GUIDED_SRC-SCHED: # BB#0: +; GUIDED_SRC-SCHED-NEXT: lzcntq %rsi, %rcx +; GUIDED_SRC-SCHED-NEXT: xorl %edx, %edx +; GUIDED_SRC-SCHED-NEXT: lzcntq %rdi, %rax +; GUIDED_SRC-SCHED-NEXT: cmovael %edx, %ecx +; GUIDED_SRC-SCHED-NEXT: addl %ecx, %eax +; GUIDED_SRC-SCHED-NEXT: # kill: %EAX %EAX %RAX +; GUIDED_SRC-SCHED-NEXT: retq + %3 = tail call i64 @llvm.ctlz.i64(i64 %0, i1 false) ; <-- SWAP + %4 = tail call i64 @llvm.ctlz.i64(i64 %1, i1 false) ; <-- SWAP + %5 = icmp ne i64 %0, 0 + %6 = select i1 %5, i64 0, i64 %4 + %7 = add nuw nsw i64 %6, %3 + %8 = trunc i64 %7 to i32 + ret i32 %8 +} +declare i64 @llvm.ctlz.i64(i64, i1) +