diff --git a/llvm/include/llvm/CodeGen/TargetInstrInfo.h b/llvm/include/llvm/CodeGen/TargetInstrInfo.h --- a/llvm/include/llvm/CodeGen/TargetInstrInfo.h +++ b/llvm/include/llvm/CodeGen/TargetInstrInfo.h @@ -25,6 +25,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineOperand.h" #include "llvm/CodeGen/MachineOutliner.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/Support/BranchProbability.h" @@ -1076,9 +1077,23 @@ /// faster sequence. /// \param Root - Instruction that could be combined with one of its operands /// \param Patterns - Vector of possible combination patterns - virtual bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &Patterns) const; + virtual bool + getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const; + + /// Return true if target supports reassociation of instructions in machine + /// combiner pass to reduce register pressure for a given BB. + virtual bool + shouldReduceRegisterPressure(MachineBasicBlock *MBB, + RegisterClassInfo *RegClassInfo) const { + return false; + } + + /// Fix up the placeholder we may add in genAlternativeCodeSequence(). + virtual void + finalizeInsInstrs(MachineInstr &Root, MachineCombinerPattern &P, + SmallVectorImpl &InsInstrs) const {} /// Return true when a code sequence can improve throughput. It /// should be called only for instructions in loops. diff --git a/llvm/lib/CodeGen/MachineCombiner.cpp b/llvm/lib/CodeGen/MachineCombiner.cpp --- a/llvm/lib/CodeGen/MachineCombiner.cpp +++ b/llvm/lib/CodeGen/MachineCombiner.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineSizeOpts.h" #include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/TargetInstrInfo.h" #include "llvm/CodeGen/TargetRegisterInfo.h" #include "llvm/CodeGen/TargetSchedule.h" @@ -72,6 +73,7 @@ MachineTraceMetrics::Ensemble *MinInstr; MachineBlockFrequencyInfo *MBFI; ProfileSummaryInfo *PSI; + RegisterClassInfo RegClassInfo; TargetSchedModel TSchedModel; @@ -103,6 +105,10 @@ SmallVectorImpl &DelInstrs, DenseMap &InstrIdxForVirtReg, MachineCombinerPattern Pattern, bool SlackIsAccurate); + bool reduceRegisterPressure(MachineInstr &Root, MachineBasicBlock *MBB, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + MachineCombinerPattern Pattern); bool preservesResourceLen(MachineBasicBlock *MBB, MachineTraceMetrics::Trace BlockTrace, SmallVectorImpl &InsInstrs, @@ -257,8 +263,9 @@ /// The combiner's goal may differ based on which pattern it is attempting /// to optimize. enum class CombinerObjective { - MustReduceDepth, // The data dependency chain must be improved. - Default // The critical path must not be lengthened. + MustReduceDepth, // The data dependency chain must be improved. + MustReduceRegisterPressure, // The register pressure must be reduced. + Default // The critical path must not be lengthened. }; static CombinerObjective getCombinerObjective(MachineCombinerPattern P) { @@ -300,6 +307,18 @@ return {NewRootLatency, RootLatency}; } +bool MachineCombiner::reduceRegisterPressure( + MachineInstr &Root, MachineBasicBlock *MBB, + SmallVectorImpl &InsInstrs, + SmallVectorImpl &DelInstrs, + MachineCombinerPattern Pattern) { + // FIXME: for now, we don't do any check for the register pressure patterns. + // We treat them as always profitable. But we can do better if we make + // RegPressureTracker class be aware of TIE attribute. Then we can get an + // accurate compare of register pressure with DelInstrs or InsInstrs. + return true; +} + /// The DAGCombine code sequence ends in MI (Machine Instruction) Root. /// The new code sequence ends in MI NewRoot. A necessary condition for the new /// sequence to replace the old sequence is that it cannot lengthen the critical @@ -438,6 +457,8 @@ /// \param DelInstrs instruction to delete from \p MBB /// \param MinInstr is a pointer to the machine trace information /// \param RegUnits set of live registers, needed to compute instruction depths +/// \param TII is target instruction info, used to call target hook +/// \param Pattern is used to call target hook finalizeInsInstrs /// \param IncrementalUpdate if true, compute instruction depths incrementally, /// otherwise invalidate the trace static void insertDeleteInstructions(MachineBasicBlock *MBB, MachineInstr &MI, @@ -445,7 +466,18 @@ SmallVector DelInstrs, MachineTraceMetrics::Ensemble *MinInstr, SparseSet &RegUnits, + const TargetInstrInfo *TII, + MachineCombinerPattern Pattern, bool IncrementalUpdate) { + // If we want to fix up some placeholder for some target, do it now. + // We need this because in genAlternativeCodeSequence, we have not decided the + // better pattern InsInstrs or DelInstrs, so we don't want generate some + // sideeffect to the function. For example we need to delay the constant pool + // entry creation here after InsInstrs is selected as better pattern. + // Otherwise the constant pool entry created for InsInstrs will not be deleted + // even if InsInstrs is not the better pattern. + TII->finalizeInsInstrs(MI, Pattern, InsInstrs); + for (auto *InstrPtr : InsInstrs) MBB->insert((MachineBasicBlock::iterator)&MI, InstrPtr); @@ -522,6 +554,9 @@ bool OptForSize = OptSize || llvm::shouldOptimizeForSize(MBB, PSI, MBFI); + bool DoRegPressureReduce = + TII->shouldReduceRegisterPressure(MBB, &RegClassInfo); + while (BlockIter != MBB->end()) { auto &MI = *BlockIter++; SmallVector Patterns; @@ -552,7 +587,7 @@ // machine-combiner-verify-pattern-order is enabled, all patterns are // checked to ensure later patterns do not provide better latency savings. - if (!TII->getMachineCombinerPatterns(MI, Patterns)) + if (!TII->getMachineCombinerPatterns(MI, Patterns, DoRegPressureReduce)) continue; if (VerifyPatternOrder) @@ -588,12 +623,33 @@ if (ML && TII->isThroughputPattern(P)) SubstituteAlways = true; - if (IncrementalUpdate) { + if (IncrementalUpdate && LastUpdate != BlockIter) { // Update depths since the last incremental update. MinInstr->updateDepths(LastUpdate, BlockIter, RegUnits); LastUpdate = BlockIter; } + if (DoRegPressureReduce && + getCombinerObjective(P) == + CombinerObjective::MustReduceRegisterPressure) { + if (MBB->size() > inc_threshold) { + // Use incremental depth updates for basic blocks above threshold + IncrementalUpdate = true; + LastUpdate = BlockIter; + } + if (reduceRegisterPressure(MI, MBB, InsInstrs, DelInstrs, P)) { + // Replace DelInstrs with InsInstrs. + insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, + RegUnits, TII, P, IncrementalUpdate); + Changed |= true; + + // Go back to previous instruction as it may have ILP reassociation + // opportunity. + BlockIter--; + break; + } + } + // Substitute when we optimize for codesize and the new sequence has // fewer instructions OR // the new sequence neither lengthens the critical path nor increases @@ -601,7 +657,7 @@ if (SubstituteAlways || doSubstitute(NewInstCount, OldInstCount, OptForSize)) { insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, - RegUnits, IncrementalUpdate); + RegUnits, TII, P, IncrementalUpdate); // Eagerly stop after the first pattern fires. Changed = true; break; @@ -624,7 +680,7 @@ } insertDeleteInstructions(MBB, MI, InsInstrs, DelInstrs, MinInstr, - RegUnits, IncrementalUpdate); + RegUnits, TII, P, IncrementalUpdate); // Eagerly stop after the first pattern fires. Changed = true; @@ -660,6 +716,7 @@ nullptr; MinInstr = nullptr; OptSize = MF.getFunction().hasOptSize(); + RegClassInfo.runOnMachineFunction(MF); LLVM_DEBUG(dbgs() << getPassName() << ": " << MF.getName() << '\n'); if (!TII->useMachineCombiner()) { diff --git a/llvm/lib/CodeGen/TargetInstrInfo.cpp b/llvm/lib/CodeGen/TargetInstrInfo.cpp --- a/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -778,8 +778,8 @@ // instruction is known to not increase the critical path, then don't match // that pattern. bool TargetInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &Patterns) const { + MachineInstr &Root, SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const { bool Commute; if (isReassociationCandidate(Root, Commute)) { // We found a sequence of instructions that may be suitable for a diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/llvm/lib/Target/AArch64/AArch64InstrInfo.h --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -235,9 +235,10 @@ /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in ``Root``. All potential patterns are /// listed in the ``Patterns`` array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &Patterns) const override; + bool + getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const override; /// Return true when Inst is associative and commutative so that it can be /// reassociated. bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -4508,8 +4508,8 @@ /// pattern evaluator stops checking as soon as it finds a faster sequence. bool AArch64InstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &Patterns) const { + MachineInstr &Root, SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const { // Integer patterns if (getMaddPatterns(Root, Patterns)) return true; @@ -4517,7 +4517,8 @@ if (getFMAPatterns(Root, Patterns)) return true; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); } enum class FMAInstKind { Default, Indexed, Accumulator }; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.h b/llvm/lib/Target/PowerPC/PPCInstrInfo.h --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.h +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.h @@ -348,9 +348,9 @@ /// Return true when there is potentially a faster code sequence /// for an instruction chain ending in . All potential patterns are /// output in the array. - bool getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &P) const override; + bool getMachineCombinerPatterns(MachineInstr &Root, + SmallVectorImpl &P, + bool DoRegPressureReduce) const override; bool isAssociativeAndCommutative(const MachineInstr &Inst) const override; diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -421,8 +421,8 @@ } bool PPCInstrInfo::getMachineCombinerPatterns( - MachineInstr &Root, - SmallVectorImpl &Patterns) const { + MachineInstr &Root, SmallVectorImpl &Patterns, + bool DoRegPressureReduce) const { // Using the machine combiner in this way is potentially expensive, so // restrict to when aggressive optimizations are desired. if (Subtarget.getTargetMachine().getOptLevel() != CodeGenOpt::Aggressive) @@ -431,7 +431,8 @@ if (getFMAPatterns(Root, Patterns)) return true; - return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns); + return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns, + DoRegPressureReduce); } void PPCInstrInfo::genAlternativeCodeSequence(