Index: lib/Target/SystemZ/SystemZHazardRecognizer.h =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.h +++ lib/Target/SystemZ/SystemZHazardRecognizer.h @@ -54,6 +54,10 @@ /// decoder group. unsigned CurrGroupSize; + /// True if an instruction with four reg operands have been scheduled into + /// the current decoder group. + bool CurrGroupHas4RegOps; + /// The tracking of resources here are quite similar to the common /// code use of a critical resource. However, z13 differs in the way /// that it has two processor sides which may be interesting to @@ -73,6 +77,9 @@ /// Return true if MI fits into current decoder group. bool fitsIntoCurrentGroup(SUnit *SU) const; + /// Return true if this instruction has four register operands. + bool has4RegOps(const MachineInstr *MI) const; + /// Two decoder groups per cycle are formed (for z13), meaning 2x3 /// instructions. This function returns a number between 0 and 5, /// representing the current decoder slot of the current cycle. If an SU Index: lib/Target/SystemZ/SystemZHazardRecognizer.cpp =================================================================== --- lib/Target/SystemZ/SystemZHazardRecognizer.cpp +++ lib/Target/SystemZ/SystemZHazardRecognizer.cpp @@ -81,6 +81,7 @@ void SystemZHazardRecognizer::Reset() { CurrGroupSize = 0; + CurrGroupHas4RegOps = false; clearProcResCounters(); GrpCount = 0; LastFPdOpCycleIdx = UINT_MAX; @@ -99,6 +100,12 @@ if (SC->BeginGroup) return (CurrGroupSize == 0); + // An instruction with 4 register operands will not fit in last slot. + assert ((CurrGroupSize < 2 || !CurrGroupHas4RegOps) || + "Current decoder group is already full!"); + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return false; + // Since a full group is handled immediately in EmitInstruction(), // SU should fit into current group. NumSlots should be 1 or 0, // since it is not a cracked or expanded instruction. @@ -108,6 +115,23 @@ return true; } +bool SystemZHazardRecognizer::has4RegOps(const MachineInstr *MI) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const TargetRegisterInfo *TRI = &TII->getRegisterInfo(); + const MCInstrDesc &MID = MI->getDesc(); + unsigned Count = 0; + for (unsigned OpIdx = 0; OpIdx < MID.getNumOperands(); OpIdx++) { + const TargetRegisterClass *RC = TII->getRegClass(MID, OpIdx, TRI, MF); + if (RC == nullptr) + continue; + if (OpIdx >= MID.getNumDefs() && + MID.getOperandConstraint(OpIdx, MCOI::TIED_TO) != -1) + continue; + Count++; + } + return Count >= 4; +} + void SystemZHazardRecognizer::nextGroup() { if (CurrGroupSize == 0) return; @@ -119,6 +143,7 @@ // Reset counter for next group. CurrGroupSize = 0; + CurrGroupHas4RegOps = false; // Decrease counters for execution units by one. for (unsigned i = 0; i < SchedModel->getNumProcResourceKinds(); ++i) @@ -172,6 +197,8 @@ OS << "/EndsGroup"; if (SU->isUnbuffered) OS << "/Unbuffered"; + if (has4RegOps(SU->getInstr())) + OS << "/4RegOps"; } void SystemZHazardRecognizer::dumpCurrGroup(std::string Msg) const { @@ -184,6 +211,7 @@ dbgs() << "{ " << CurGroupDbg << " }"; dbgs() << " (" << CurrGroupSize << " decoder slot" << (CurrGroupSize > 1 ? "s":"") + << (CurrGroupHas4RegOps ? ", 4RegOps" : "") << ")\n"; } } @@ -294,11 +322,14 @@ // Insert SU into current group by increasing number of slots used // in current group. CurrGroupSize += getNumDecoderSlots(SU); - assert (CurrGroupSize <= 3); + CurrGroupHas4RegOps |= has4RegOps(SU->getInstr()); + unsigned GroupLim = + ((CurrGroupHas4RegOps && getNumDecoderSlots(SU) < 3) ? 2 : 3); + assert (CurrGroupSize <= GroupLim && "SU does not fit into decoder group!"); // Check if current group is now full/ended. If so, move on to next // group to be ready to evaluate more candidates. - if (CurrGroupSize == 3 || SC->EndGroup) + if (CurrGroupSize == GroupLim || SC->EndGroup) nextGroup(); } @@ -325,6 +356,10 @@ return -1; } + // An instruction with 4 register operands will not fit in last slot. + if (CurrGroupSize == 2 && has4RegOps(SU->getInstr())) + return 1; + // Most instructions can be placed in any decoder slot. return 0; } Index: lib/Target/SystemZ/SystemZMachineScheduler.cpp =================================================================== --- lib/Target/SystemZ/SystemZMachineScheduler.cpp +++ lib/Target/SystemZ/SystemZMachineScheduler.cpp @@ -169,8 +169,7 @@ return *Available.begin(); } - // All nodes that are possible to schedule are stored by in the - // Available set. + // All nodes that are possible to schedule are stored in the Available set. LLVM_DEBUG(dbgs() << "** Available: "; Available.dump(*HazardRec);); Candidate Best; Index: test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll =================================================================== --- test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll +++ test/CodeGen/SystemZ/vec-cmp-cmp-logic-select.ll @@ -688,8 +688,8 @@ ; CHECK-NEXT: vpkg %v6, %v6, %v7 ; CHECK-NEXT: vpkg %v4, %v4, %v5 ; CHECK-NEXT: vn %v5, %v16, %v6 -; CHECK-NEXT: vsel %v24, %v3, %v2, %v5 -; CHECK-NEXT: vldeb %v17, %v17 +; CHECK-DAG: vsel %v24, %v3, %v2, %v5 +; CHECK-DAG: vldeb %v17, %v17 ; CHECK-NEXT: vldeb %v18, %v18 ; CHECK-NEXT: vfchdb %v17, %v18, %v17 ; CHECK-NEXT: vmrhf %v18, %v30, %v30