Index: llvm/lib/Target/ARM/ARMScheduleA57.td =================================================================== --- llvm/lib/Target/ARM/ARMScheduleA57.td +++ llvm/lib/Target/ARM/ARMScheduleA57.td @@ -177,11 +177,6 @@ // TODO: according to the doc, conditional uses I0/I1, unconditional uses M // Why more complex instruction uses more simple pipeline? // May be an error in doc. -def A57WriteALUsi : SchedWriteVariant<[ - // lsl #2, lsl #1, or lsr #1. - SchedVar, - SchedVar -]>; def A57WriteALUsr : SchedWriteVariant<[ SchedVar, SchedVar @@ -194,7 +189,7 @@ SchedVar, SchedVar ]>; -def : SchedAlias; +def : SchedAlias; def : SchedAlias; def : SchedAlias; def : SchedAlias; Index: llvm/utils/TableGen/CodeGenSchedule.cpp =================================================================== --- llvm/utils/TableGen/CodeGenSchedule.cpp +++ llvm/utils/TableGen/CodeGenSchedule.cpp @@ -1317,10 +1317,6 @@ SmallVector ProcIndices; PredTransition() = default; - PredTransition(ArrayRef PT) { - PredTerm.assign(PT.begin(), PT.end()); - ProcIndices.assign(1, 0); - } PredTransition(ArrayRef PT, ArrayRef PIds) { PredTerm.assign(PT.begin(), PT.end()); ProcIndices.assign(PIds.begin(), PIds.end()); @@ -1349,14 +1345,25 @@ private: bool mutuallyExclusive(Record *PredDef, ArrayRef Term); - void getIntersectingVariants( - const CodeGenSchedRW &SchedRW, unsigned TransIdx, - std::vector &IntersectingVariants); + void getIntersectingVariants(const CodeGenSchedRW &SchedRW, unsigned TransIdx, + std::vector &IntersectingVariants, + DenseMap &VarTracker); + void addIntersectingVariant(unsigned TransIdx, TransVariant &Variant, + std::vector &IntersectingVariants); void pushVariant(const TransVariant &VInfo, bool IsRead); }; } // end anonymous namespace +template <> struct llvm::DenseMapInfo { + static inline TransVariant getEmptyKey() { return {nullptr, 0, 0, 0}; } + static inline TransVariant getTombstoneKey() { return {nullptr, -1U, 0, 0}; } + static unsigned getHashValue(const TransVariant &Val) { return Val.RWIdx; } + static bool isEqual(const TransVariant &LHS, const TransVariant &RHS) { + return LHS.VarOrSeqDef == RHS.VarOrSeqDef && LHS.ProcIdx == RHS.ProcIdx; + } +}; + // Return true if this predicate is mutually exclusive with a PredTerm. This // degenerates into checking if the predicate is mutually exclusive with any // predicate in the Term's conjunction. @@ -1422,12 +1429,33 @@ return false; } +void PredTransitions::addIntersectingVariant( + unsigned TransIdx, TransVariant &Variant, + std::vector &IntersectingVariants) { + if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) { + Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate"); + if (mutuallyExclusive(PredDef, TransVec[TransIdx].PredTerm)) + return; + } + if (IntersectingVariants.empty()) { + // The first variant builds on the existing transition. + Variant.TransVecIdx = TransIdx; + IntersectingVariants.push_back(Variant); + } else { + // Push another copy of the current transition for more variants. + Variant.TransVecIdx = TransVec.size(); + IntersectingVariants.push_back(Variant); + TransVec.push_back(TransVec[TransIdx]); + } +} + // Populate IntersectingVariants with any variants or aliased sequences of the // given SchedRW whose processor indices and predicates are not mutually // exclusive with the given transition. void PredTransitions::getIntersectingVariants( - const CodeGenSchedRW &SchedRW, unsigned TransIdx, - std::vector &IntersectingVariants) { + const CodeGenSchedRW &SchedRW, unsigned TransIdx, + std::vector &IntersectingVariants, + DenseMap &VarTracker) { bool GenericRW = false; @@ -1475,8 +1503,10 @@ if (ProcIndices[0] && Variant.ProcIdx) { unsigned Cnt = std::count(ProcIndices.begin(), ProcIndices.end(), Variant.ProcIdx); - if (!Cnt) + if (!Cnt) { + VarTracker.insert({Variant, false}); continue; + } if (Cnt > 1) { const CodeGenProcModel &PM = *(SchedModels.procModelBegin() + Variant.ProcIdx); @@ -1486,22 +1516,8 @@ " Ensure only one SchedAlias exists per RW."); } } - if (Variant.VarOrSeqDef->isSubClassOf("SchedVar")) { - Record *PredDef = Variant.VarOrSeqDef->getValueAsDef("Predicate"); - if (mutuallyExclusive(PredDef, TransVec[TransIdx].PredTerm)) - continue; - } - if (IntersectingVariants.empty()) { - // The first variant builds on the existing transition. - Variant.TransVecIdx = TransIdx; - IntersectingVariants.push_back(Variant); - } - else { - // Push another copy of the current transition for more variants. - Variant.TransVecIdx = TransVec.size(); - IntersectingVariants.push_back(Variant); - TransVec.push_back(TransVec[TransIdx]); - } + VarTracker[Variant] = true; + addIntersectingVariant(TransIdx, Variant, IntersectingVariants); } if (GenericRW && IntersectingVariants.empty()) { PrintFatalError(SchedRW.TheDef->getLoc(), "No variant of this type has " @@ -1583,16 +1599,10 @@ const SmallVectorImpl &RWSeq, bool IsRead, bool IsForAnyCPU, unsigned StartIdx) { - auto CollectAndAddVariants = [&](unsigned TransIdx, - const CodeGenSchedRW &SchedRW) { - // Distribute this partial PredTransition across intersecting variants. - // This will push a copies of TransVec[TransIdx] on the back of TransVec. - std::vector IntersectingVariants; - getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants); + auto PushVariants = [&](std::vector &Variants, bool IsRead) { // Now expand each variant on top of its copy of the transition. - for (const TransVariant &IV : IntersectingVariants) + for (const TransVariant &IV : Variants) pushVariant(IV, IsRead); - return !IntersectingVariants.empty(); }; // Visit each original RW within the current sequence. @@ -1602,7 +1612,7 @@ // Push this RW on all partial PredTransitions or distribute variants. // New PredTransitions may be pushed within this loop which should not be // revisited (TransEnd must be loop invariant). - bool HasAliases = false, WasPushed = false; + DenseMap VTracker; for (unsigned TransIdx = StartIdx, TransEnd = TransVec.size(); TransIdx != TransEnd; ++TransIdx) { // In the common case, push RW onto the current operand's sequence. @@ -1613,23 +1623,34 @@ TransVec[TransIdx].WriteSequences.back().push_back(*RWI); continue; } - HasAliases = true; - WasPushed |= CollectAndAddVariants(TransIdx, SchedRW); - } - if (IsRead && IsForAnyCPU && HasAliases && !WasPushed) { + // Distribute this partial PredTransition across intersecting variants. + // This will push a copies of TransVec[TransIdx] on the back of TransVec. + std::vector IntersectingVariants; + getIntersectingVariants(SchedRW, TransIdx, IntersectingVariants, + VTracker); + PushVariants(IntersectingVariants, IsRead); + } + if (IsRead && IsForAnyCPU) { // If we're here this means that in some sched class: // a) We have read variant for CPU A // b) We have write variant for CPU B // b) We don't have write variant for CPU A // d) We must expand all read/write variants (IsForAnyCPU is true) - // e) We couldn't expand SchedRW because TransVec doesn't have - // any transition with compatible CPU ID. + // e) We couldn't expand SchedRW or some of its variants, because + // TransVec doesn't have any transition with compatible CPU ID. // In such case we create new empty transition with zero (AnyCPU) // index. TransVec.reserve(TransVec.size() + 1); - TransVec.emplace_back(TransVec[StartIdx].PredTerm); + TransVec.emplace_back(); TransVec.back().ReadSequences.emplace_back(); - CollectAndAddVariants(TransVec.size() - 1, SchedRW); + std::vector Variants; + for (auto &P : VTracker) + if (!P.second) + addIntersectingVariant(TransVec.size() - 1, P.first, Variants); + PushVariants(Variants, IsRead); + // Remove empty transition, if we haven't found anything to push. + if (Variants.empty()) + TransVec.pop_back(); } } } @@ -1690,9 +1711,10 @@ dumpIdxVec(SCTrans.ProcIndices); dbgs() << ")\n"); } // Create a new SchedClass for each variant found by inferFromRW. Pass -static void inferFromTransitions(ArrayRef LastTransitions, - unsigned FromClassIdx, - CodeGenSchedModels &SchedModels) { +static void inferFromTransitions( + ArrayRef LastTransitions, + const SmallVectorImpl> &InitialWrites, + unsigned FromClassIdx, CodeGenSchedModels &SchedModels) { // For each PredTransition, create a new CodeGenSchedTransition, which usually // requires creating a new SchedClass. for (ArrayRef::iterator @@ -1711,6 +1733,15 @@ }); if (SCTrans.ProcIndices.empty()) continue; + + // Some sched classes may only have read variants. In such case we + // populate writes from initially expanded sequences. We can do this, + // because none of those writes is variant for any processor in + // I->ProcIndices. + if (OperWritesVariant.empty()) + addSequences(SchedModels, InitialWrites, OperWritesVariant, false); + + assert(!OperWritesVariant.empty() && "No writes in variant sched class"); SCTrans.ToClassIdx = SchedModels.addSchedClass(/*ItinClassDef=*/nullptr, OperWritesVariant, OperReadsVariant, I->ProcIndices); @@ -1763,6 +1794,8 @@ } LLVM_DEBUG(dbgs() << '\n'); + SmallVector, 16> InitialWrites = + LastTransitions[0].WriteSequences; // Collect all PredTransitions for individual operands. // Iterate until no variant writes remain. while (hasVariant(LastTransitions, *this)) { @@ -1778,7 +1811,7 @@ // WARNING: We are about to mutate the SchedClasses vector. Do not refer to // OperWrites, OperReads, or ProcIndices after calling inferFromTransitions. - inferFromTransitions(LastTransitions, FromClassIdx, *this); + inferFromTransitions(LastTransitions, InitialWrites, FromClassIdx, *this); } // Check if any processor resource group contains all resource records in