diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEInfo.h @@ -13,10 +13,15 @@ #define LLVM_CODEGEN_GLOBALISEL_CSEINFO_H #include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/iterator_range.h" #include "llvm/CodeGen/CSEConfigBase.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" #include "llvm/CodeGen/GlobalISel/GISelWorkList.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/CodeGen.h" @@ -56,6 +61,77 @@ std::unique_ptr getStandardCSEConfigForOpt(CodeGenOpt::Level Level); +/// A wrapper for SlotIndexes that holds yet-to-be inserted instructions. +/// This is used so we can lazily renumber the indices when the next +/// query is made. The created() observer callback actually executes +/// *before* the MachineInstr is inserted, but SlotIndexes needs to have +/// the instruction fully inserted before it starts renumbering. +class MachineInstNumbering { + SlotIndexes *SI = nullptr; + MachineFunction *MF = nullptr; + bool IsEnabled = true; + /// Contains all instructions that need renumbering. + SmallVector InsertionQueue; + + /// Extract instructions from the queue that have the same parent block, + /// and null them out in the queue. + void + extractInstsFromSameBlock(SmallPtrSetImpl &ExtractedInsts); + /// Find the nearest predecessor instruction that's not queued. + MachineBasicBlock::iterator + findEarlierNonQueuedInst(MachineBasicBlock::iterator Start, + SmallPtrSetImpl &InstQueue); + /// Find the nearest successor instruction that's not queued. + MachineBasicBlock::iterator + findLaterNonQueuedInst(MachineBasicBlock::iterator Start, + SmallPtrSetImpl &InstQueue); + + /// Returns true if the instruction \p A is earlier than the SlotIndex \p BIdx + /// in the block using the instruction numbering. If \p NoFlush is true, then + /// do not flush any queued insertions before performing the query. This can + /// be used for efficiency reasons if both \A and \B are known to be already + /// in the SlotIndexes. + bool isEarlier(const MachineInstr &A, const SlotIndex &BIdx, + bool NoFlush = false); + +public: + MachineInstNumbering() = default; + void init(MachineFunction &MF); + + void setMF(MachineFunction &MF) { this->MF = &MF; } + bool isEnabled() const { return SI != nullptr; } + /// Enable numbering of the given MF, unless the MF has already been analyzed. + void enable(MachineFunction *MF = nullptr); + void disable(); + + /// Flush the queue and start a renumbering for the regions that need it. + void flushAndRenumber(); + /// Remove an instruction from SlotIndexes. + void removeInstruction(MachineInstr &MI); + /// Queue an instruction to be inserted and trigger renumbering + /// on the next query event. + void queueInsertion(MachineInstr &MI); + /// Repair numbering in the given range. + void repairNumberingInRange(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End); + /// Returns true if \p A is earlier than \p B in the block using the + /// instruction numbering. + bool isEarlier(const MachineInstr &A, const MachineInstr &B); + + /// Returns the earliest MachineInstr in the given set \p Instrs. + /// If \p NoFlush is true, then do not flush any queued insertions before + /// performing the query. This can be used for efficiency reasons if both + /// \A and \B are known to be already in the SlotIndexes. + MachineInstr *getEarliestInstr(const SmallPtrSetImpl &Instrs, + bool NoFlush = false); + + /// Deleted copy ctor and assignment operator to prevent copying these. + MachineInstNumbering(const MachineInstNumbering &_) = delete; + MachineInstNumbering &operator=(const MachineInstNumbering &_) = delete; + + ~MachineInstNumbering(); +}; + /// The CSE Analysis object. /// This installs itself as a delegate to the MachineFunction to track /// new instructions as well as deletions. It however will not be able to @@ -74,6 +150,7 @@ FoldingSet CSEMap; MachineRegisterInfo *MRI = nullptr; MachineFunction *MF = nullptr; + MachineInstNumbering MINumbering; std::unique_ptr CSEOpt; /// Keep a cache of UniqueInstrs for each MachineInstr. In GISel, /// often instructions are mutated (while their ID has completely changed). @@ -120,6 +197,9 @@ Error verify(); + MachineInstNumbering &getInstNumbering() { return MINumbering; } + const MachineInstNumbering &getInstNumbering() const { return MINumbering; } + /// Records a newly created inst in a list and lazily insert it to the CSEMap. /// Sometimes, this method might be called with a partially constructed /// MachineInstr, diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/CSEMIRBuilder.h @@ -41,7 +41,7 @@ // a better choice. Does IRTranslator placing constants at the beginning still // make sense? Should this change based on Opcode? bool dominates(MachineBasicBlock::const_iterator A, - MachineBasicBlock::const_iterator B) const; + MachineBasicBlock::const_iterator B); /// For given ID, find a machineinstr in the CSE Map. If found, check if it /// dominates the current insertion point and if not, move it just before the diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Legalizer.h @@ -21,6 +21,7 @@ #define LLVM_CODEGEN_GLOBALISEL_LEGALIZEMACHINEIRPASS_H #include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" namespace llvm { diff --git a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h --- a/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/Localizer.h @@ -29,6 +29,7 @@ // Forward declarations. class MachineRegisterInfo; class TargetTransformInfo; +class CSEInfo; /// This pass implements the localization mechanism described at the /// top of this file. One specificity of the implementation is that @@ -51,6 +52,8 @@ MachineRegisterInfo *MRI; /// TTI used for getting remat costs for instructions. TargetTransformInfo *TTI; + /// Used for accessing the instruction numbering to speed up localizing. + GISelCSEInfo *CSEInfo = nullptr; /// Check if \p MOUse is used in the same basic block as \p Def. /// If the use is in the same block, we say it is local. diff --git a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h --- a/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/CodeGen/TargetOpcodes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" @@ -368,7 +369,8 @@ /// /// \return a MachineInstrBuilder for the newly created instruction. MachineInstrBuilder buildInstr(unsigned Opcode) { - return insertInstr(buildInstrNoInsert(Opcode)); + auto MIB = insertInstr(buildInstrNoInsert(Opcode)); + return MIB; } /// Build but don't insert = \p Opcode . diff --git a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h --- a/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h +++ b/llvm/include/llvm/CodeGen/GlobalISel/RegBankSelect.h @@ -504,7 +504,7 @@ std::unique_ptr MORE; /// Helper class used for every code morphing. - MachineIRBuilder MIRBuilder; + std::unique_ptr MIRBuilder; /// Optimization mode of the pass. Mode OptMode; diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp --- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp @@ -9,7 +9,12 @@ // //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GlobalISel/CSEInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/InitializePasses.h" #define DEBUG_TYPE "cseinfo" @@ -173,6 +178,291 @@ // Else do nothing. } +void MachineInstNumbering::init(MachineFunction &MF) { + if (!IsEnabled) + return; + this->MF = &MF; + if (SI) { + SI->releaseMemory(); + SI->runOnMachineFunction(MF); + return; + } + SI = new class SlotIndexes(); + SI->runOnMachineFunction(MF); +} + +void MachineInstNumbering::enable(MachineFunction *MF) { + if (IsEnabled && this->MF == MF) + return; + IsEnabled = true; + if (MF) + init(*MF); +} + +void MachineInstNumbering::disable() { + if (SI) { + delete SI; + SI = nullptr; + } + InsertionQueue.clear(); + IsEnabled = false; + return; +} + +void MachineInstNumbering::removeInstruction(MachineInstr &MI) { + assert(IsEnabled && "Expected Inst Numbering to be enabled"); + // Remove the instruction from the queue if it exists. Due to a bug that + // inserts instructions twice due to duplicate observers, we check the entire + // queue instead of quitting on the first hit. + bool Found = false; + for (unsigned I = 0, E = InsertionQueue.size(); I < E; ++I) { + if (InsertionQueue[I] == &MI) { + InsertionQueue[I] = nullptr; + Found = true; + } + } + if (!Found) + SI->removeSingleMachineInstrFromMaps(MI); +} + +void MachineInstNumbering::queueInsertion(MachineInstr &MI) { + assert(IsEnabled && "Expected Inst Numbering to be enabled"); + InsertionQueue.emplace_back(&MI); +} + +void MachineInstNumbering::repairNumberingInRange( + MachineBasicBlock::iterator Begin, MachineBasicBlock::iterator End) { + assert(IsEnabled && "Expected Inst Numbering to be enabled"); + flushAndRenumber(); + SI->repairIndexesInRange(Begin->getParent(), Begin, End); +} + +void MachineInstNumbering::flushAndRenumber() { + assert(IsEnabled && "Expected Inst Numbering to be enabled"); + + if (InsertionQueue.empty()) + return; + + // There may be multiple queued instructions for a given block. Because of + // this we can't trivially find the iterator ranges to do a local index + // repair since the neighbouring instructions may not have valid indexes yet. + // So, we have to find the nearest non-queued predecessor instruction in the + // block that dominates all of the queued instructions. + // + // If the instruction is not queued, then we should have a valid index, and + // therefore can quickly check if any other non-queued instruction is earlier + // or later. + + SmallPtrSet SameBlockSubset; + while (true) { + // First we have to extract all the instructions that share a common basic + // block. + extractInstsFromSameBlock(SameBlockSubset); + if (SameBlockSubset.empty()) { + // We have no more instructions to process. + InsertionQueue.clear(); + return; + } + + // Now walk up from each instruction position until we hit an unqueued + // instruction, and compare the SlotIndex for that to the current "earliest" + // index. + SlotIndex DominatingInstSlotIdx; + MachineBasicBlock::iterator StartIt; + for (auto *MI : SameBlockSubset) { + auto It = MI->getIterator(); + auto TerminalIt = MI->getParent()->begin(); + if (It == TerminalIt) { + // This is the first instruction, end search. + StartIt = It; + break; + } + + auto NonQueuedIt = findEarlierNonQueuedInst(It, SameBlockSubset); + + // There's an edge case where the iterator returned by + // findEarlierNonQueuedInst() is begin(), but is also in the queue. + if (NonQueuedIt == TerminalIt) { + StartIt = NonQueuedIt; + break; + } + + SlotIndex NonQueuedIndex = SI->getInstructionIndex(*NonQueuedIt); + if (!DominatingInstSlotIdx.isValid()) { + // This is the first instruction checked. + DominatingInstSlotIdx = NonQueuedIndex; + StartIt = NonQueuedIt; + } else if (SlotIndex::isEarlierInstr(NonQueuedIndex, + DominatingInstSlotIdx)) { + DominatingInstSlotIdx = NonQueuedIndex; + StartIt = NonQueuedIt; + } + + // If the current iterator is the first instruction, then end the search + // early. + if (StartIt == TerminalIt) + break; + } + + // Now we have the nearest predecessor which we can use as a repair range + // start point. Now do the same thing for the end iterator. + + SlotIndex PostDomSlotIdx; + MachineBasicBlock::iterator EndIt; + for (auto *MI : SameBlockSubset) { + auto It = MI->getIterator(); + auto TerminalIt = MI->getParent()->end(); + if (std::next(It) == TerminalIt) { + EndIt = TerminalIt; + break; + } + + auto NonQueuedIt = findLaterNonQueuedInst(It, SameBlockSubset); + + // There's an edge case where the iterator returned by + // findLaterNonQueuedInst() is end(), but is also in the queue. + if (NonQueuedIt == TerminalIt) { + EndIt = NonQueuedIt; + break; + } + + SlotIndex NonQueuedIndex = SI->getInstructionIndex(*NonQueuedIt); + if (!PostDomSlotIdx.isValid()) { + // This is the first instruction checked. + PostDomSlotIdx = NonQueuedIndex; + EndIt = NonQueuedIt; + } else if (SlotIndex::isEarlierInstr(PostDomSlotIdx, NonQueuedIndex)) { + PostDomSlotIdx = NonQueuedIndex; + EndIt = NonQueuedIt; + } + + // If the current iterator is the first instruction, then end the search + // early. + if (EndIt == TerminalIt) + break; + } + SI->repairIndexesInRange(StartIt->getParent(), StartIt, EndIt); + SameBlockSubset.clear(); + } +} + +MachineBasicBlock::iterator MachineInstNumbering::findEarlierNonQueuedInst( + MachineBasicBlock::iterator Start, + SmallPtrSetImpl &InstQueue) { + MachineBasicBlock *ParentBB = Start->getParent(); + assert(Start != ParentBB->begin() && "Iterator should not be the start"); + MachineBasicBlock::iterator II = Start; + do { + --II; + MachineInstr *CurrInst = &*II; + + // DBG_VALUE instructions aren't inserted into SlotIndexes, so skip them. + if (CurrInst->isDebugInstr()) { + if (II == ParentBB->begin()) + return II; + continue; + } + if (!InstQueue.contains(CurrInst)) { + // We have a non-queued instruction. Return the iterator. + return CurrInst->getIterator(); + } + } while (II != ParentBB->begin()); + return II; +} + +MachineBasicBlock::iterator MachineInstNumbering::findLaterNonQueuedInst( + MachineBasicBlock::iterator Start, + SmallPtrSetImpl &InstQueue) { + MachineBasicBlock *ParentBB = Start->getParent(); + assert(Start != ParentBB->end() && "Iterator should not be the start"); + MachineBasicBlock::iterator II = Start; + do { + ++II; + if (II == ParentBB->end()) + return II; + + MachineInstr *CurrInst = &*II; + + // DBG_VALUE instructions aren't inserted into SlotIndexes, so skip them. + if (CurrInst->isDebugInstr()) { + if (II == ParentBB->end()) + return II; + continue; + } + if (!InstQueue.contains(CurrInst)) { + // We have a non-queued instruction. Return the iterator. + return CurrInst->getIterator(); + } + } while (true); + return II; +} + +void MachineInstNumbering::extractInstsFromSameBlock( + SmallPtrSetImpl &ExtractedInsts) { + MachineBasicBlock *ParentBB = nullptr; + for (unsigned I = 0, E = InsertionQueue.size(); I < E; ++I) { + MachineInstr *MI = InsertionQueue[I]; + if (!MI) + continue; + if (!ParentBB) { + ParentBB = MI->getParent(); + ExtractedInsts.insert(MI); + InsertionQueue[I] = nullptr; + continue; + } + if (MI->getParent() == ParentBB) { + ExtractedInsts.insert(MI); + InsertionQueue[I] = nullptr; + } + } +} + +bool MachineInstNumbering::isEarlier(const MachineInstr &A, + const MachineInstr &B) { + assert(IsEnabled && "Expected Inst Numbering to be enabled"); + // If we have outstanding queued instructions, process them first. + flushAndRenumber(); + + SlotIndex SlotA = SI->getInstructionIndex(A); + SlotIndex SlotB = SI->getInstructionIndex(B); + return SlotIndex::isEarlierInstr(SlotA, SlotB); +} + +bool MachineInstNumbering::isEarlier(const MachineInstr &A, + const SlotIndex &BIdx, bool NoFlush) { + assert(BIdx.isValid() && "Expected valid index"); + // If we have outstanding queued instructions, process them first. + if (!NoFlush) + flushAndRenumber(); + + SlotIndex SlotA = SI->getInstructionIndex(A); + return SlotIndex::isEarlierInstr(SlotA, BIdx); +} + +MachineInstr *MachineInstNumbering::getEarliestInstr( + const SmallPtrSetImpl &Instrs, bool NoFlush) { + MachineInstr *Earliest = nullptr; + SlotIndex EarliestIdx; + for (auto *Other : Instrs) { + if (!Earliest) { + Earliest = Other; + EarliestIdx = SI->getInstructionIndex(*Earliest); + continue; + } + assert(Earliest != Other); + if (isEarlier(*Other, EarliestIdx, NoFlush)) { + Earliest = Other; + EarliestIdx = SI->getInstructionIndex(*Other); + } + } + return Earliest; +} + +MachineInstNumbering::~MachineInstNumbering() { + if (SI) + delete SI; +} + void GISelCSEInfo::recordNewInstruction(MachineInstr *MI) { if (shouldCSE(MI->getOpcode())) { TemporaryInsts.insert(MI); @@ -222,13 +512,25 @@ return CSEOpt->shouldCSEOpc(Opc); } -void GISelCSEInfo::erasingInstr(MachineInstr &MI) { handleRemoveInst(&MI); } -void GISelCSEInfo::createdInstr(MachineInstr &MI) { recordNewInstruction(&MI); } +void GISelCSEInfo::erasingInstr(MachineInstr &MI) { + handleRemoveInst(&MI); + if (MINumbering.isEnabled()) + MINumbering.removeInstruction(MI); +} + +void GISelCSEInfo::createdInstr(MachineInstr &MI) { + recordNewInstruction(&MI); + if (MINumbering.isEnabled()) + MINumbering.queueInsertion(MI); +} void GISelCSEInfo::changingInstr(MachineInstr &MI) { - // For now, perform erase, followed by insert. - erasingInstr(MI); - createdInstr(MI); + // We effectively do an erase + insert, but we don't call erasingInstr() or + // createdInstr() because this we don't need to do anything for the + // instruction numbering for instruction mutations. + handleRemoveInst(&MI); + recordNewInstruction(&MI); } + void GISelCSEInfo::changedInstr(MachineInstr &MI) { changingInstr(MI); } void GISelCSEInfo::analyze(MachineFunction &MF) { @@ -254,6 +556,7 @@ CSEOpt.reset(); MRI = nullptr; MF = nullptr; + MINumbering.disable(); #ifndef NDEBUG OpcodeHitTable.clear(); #endif @@ -412,6 +715,7 @@ if (!AlreadyComputed || Recompute) { Info.releaseMemory(); Info.setCSEConfig(std::move(CSEOpt)); + Info.getInstNumbering().init(*MF); Info.analyze(*MF); AlreadyComputed = true; } diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp --- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp @@ -12,18 +12,25 @@ // #include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" +#include "llvm/CodeGen/GlobalISel/CSEInfo.h" #include "llvm/CodeGen/GlobalISel/GISelChangeObserver.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/IR/DebugInfoMetadata.h" using namespace llvm; bool CSEMIRBuilder::dominates(MachineBasicBlock::const_iterator A, - MachineBasicBlock::const_iterator B) const { + MachineBasicBlock::const_iterator B) { auto MBBEnd = getMBB().end(); if (B == MBBEnd) return true; assert(A->getParent() == B->getParent() && "Iterators should be in same block"); + + auto &InstNumbering = getCSEInfo()->getInstNumbering(); + if (InstNumbering.isEnabled()) + return InstNumbering.isEarlier(*A, *B); + const MachineBasicBlock *BBA = A->getParent(); MachineBasicBlock::const_iterator I = BBA->begin(); for (; &*I != A && &*I != B; ++I) @@ -48,7 +55,15 @@ // this builder will have the def ready. setInsertPt(*CurMBB, std::next(MII)); } else if (!dominates(MI, CurrPos)) { + ++MII; CurMBB->splice(CurrPos, CurMBB, MI); + auto &InstNumbering = CSEInfo->getInstNumbering(); + if (InstNumbering.isEnabled()) { + InstNumbering.removeInstruction(*MI); + // Update numbering for the modified range. + InstNumbering.repairNumberingInRange(MachineBasicBlock::iterator(MI), + MII); + } } return MachineInstrBuilder(getMF(), MI); } diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp --- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp +++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp @@ -3047,6 +3047,8 @@ CSEInfo = &Wrapper.get(TPC->getCSEConfig()); EntryBuilder->setCSEInfo(CSEInfo); CurBuilder = std::make_unique(CurMF); + // Disable instruction numbering while building the MF. + CSEInfo->getInstNumbering().disable(); CurBuilder->setCSEInfo(CSEInfo); } else { EntryBuilder = std::make_unique(); @@ -3239,6 +3241,5 @@ // Initialize stack protector information. StackProtector &SP = getAnalysis(); SP.copyToMachineFrameInfo(MF->getFrameInfo()); - return false; } diff --git a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Legalizer.cpp @@ -23,6 +23,7 @@ #include "llvm/CodeGen/GlobalISel/LegalizerHelper.h" #include "llvm/CodeGen/GlobalISel/LostDebugLocObserver.h" #include "llvm/CodeGen/GlobalISel/Utils.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineOptimizationRemarkEmitter.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -330,6 +331,7 @@ MIRBuilder = std::make_unique(); CSEInfo = &Wrapper.get(TPC.getCSEConfig()); MIRBuilder->setCSEInfo(CSEInfo); + CSEInfo->getInstNumbering().enable(&MF); } else MIRBuilder = std::make_unique(); diff --git a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp --- a/llvm/lib/CodeGen/GlobalISel/Localizer.cpp +++ b/llvm/lib/CodeGen/GlobalISel/Localizer.cpp @@ -12,8 +12,10 @@ #include "llvm/CodeGen/GlobalISel/Localizer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/TargetLowering.h" +#include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -21,6 +23,11 @@ using namespace llvm; +static cl::opt UseInstNumbering( + "localizer-use-numbering", cl::Hidden, + cl::desc("Use the instruction numbering in the localizer."), + cl::init(true)); + char Localizer::ID = 0; INITIALIZE_PASS_BEGIN(Localizer, DEBUG_TYPE, "Move/duplicate certain instructions close to their use", @@ -39,11 +46,27 @@ void Localizer::init(MachineFunction &MF) { MRI = &MF.getRegInfo(); TTI = &getAnalysis().getTTI(MF.getFunction()); + + if (!UseInstNumbering) { + CSEInfo = nullptr; + return; + } + // Enable inst numbering, we need to require the CSEInfo analysis since that + // holds the numbering. + auto &TPC = getAnalysis(); + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *FnCSEInfo = &Wrapper.get(TPC.getCSEConfig()); + FnCSEInfo->getInstNumbering().enable(&MF); + CSEInfo = FnCSEInfo; } void Localizer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); + AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -119,7 +142,11 @@ LLVM_DEBUG(dbgs() << "Update use with: " << printReg(NewVRegIt->second) << '\n'); // Update the user reg. + if (CSEInfo) + CSEInfo->changedInstr(*MOUse.getParent()); MOUse.setReg(NewVRegIt->second); + if (CSEInfo) + CSEInfo->changedInstr(*MOUse.getParent()); } } return Changed; @@ -149,15 +176,28 @@ continue; MachineBasicBlock::iterator II(MI); - ++II; - while (II != MBB.end() && !Users.count(&*II)) + if (UseInstNumbering) { + auto &InstNumbering = CSEInfo->getInstNumbering(); + assert(InstNumbering.isEnabled() && "Inst number is not enabled!"); + // We have a set of instructions which we know are users within the same + // block. We want to find the first user of MI. + MachineInstr *Nearest = + InstNumbering.getEarliestInstr(Users, /* NoFlush */ true); + II = Nearest->getIterator(); + } else { ++II; - + while (II != MBB.end() && !Users.count(&*II)) + ++II; + } LLVM_DEBUG(dbgs() << "Intra-block: moving " << *MI << " before " << *&*II << "\n"); assert(II != MBB.end() && "Didn't find the user in the MBB"); MI->removeFromParent(); MBB.insert(II, MI); + + if (UseInstNumbering) + CSEInfo->getInstNumbering().flushAndRenumber(); + Changed = true; } return Changed; @@ -177,10 +217,15 @@ init(MF); + GISelObserverWrapper WrapperObserver(CSEInfo); + RAIIMFObsDelInstaller Installer(MF, WrapperObserver); + // Keep track of the instructions we localized. We'll do a second pass of // intra-block localization to further reduce live ranges. LocalizedSetVecT LocalizedInstrs; + CSEInfo->getInstNumbering().flushAndRenumber(); + bool Changed = localizeInterBlock(MF, LocalizedInstrs); Changed |= localizeIntraBlock(LocalizedInstrs); return Changed; diff --git a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp --- a/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp +++ b/llvm/lib/CodeGen/GlobalISel/RegBankSelect.cpp @@ -13,6 +13,7 @@ #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/GlobalISel/CSEMIRBuilder.h" #include "llvm/CodeGen/GlobalISel/LegalizerInfo.h" #include "llvm/CodeGen/GlobalISel/RegisterBank.h" #include "llvm/CodeGen/GlobalISel/RegisterBankInfo.h" @@ -92,7 +93,15 @@ MBFI = nullptr; MBPI = nullptr; } - MIRBuilder.setMF(MF); + // Enable CSE and inst numbering. + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); + CSEInfo->getInstNumbering().enable(&MF); + MIRBuilder = std::make_unique(); + MIRBuilder->setMF(MF); + MIRBuilder->setCSEInfo(CSEInfo); + MORE = std::make_unique(MF, MBFI); } @@ -105,6 +114,8 @@ } AU.addRequired(); getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -160,10 +171,11 @@ // Build the instruction used to repair, then clone it at the right // places. Avoiding buildCopy bypasses the check that Src and Dst have the - // same types because the type is a placeholder when this function is called. - MI = MIRBuilder.buildInstrNoInsert(TargetOpcode::COPY) - .addDef(Dst) - .addUse(Src); + // same types because the type is a placeholder when this function is + // called. + MI = MIRBuilder->buildInstrNoInsert(TargetOpcode::COPY) + .addDef(Dst) + .addUse(Src); LLVM_DEBUG(dbgs() << "Copy: " << printReg(Src) << " to: " << printReg(Dst) << '\n'); } else { @@ -191,8 +203,7 @@ MergeOp = TargetOpcode::G_MERGE_VALUES; auto MergeBuilder = - MIRBuilder.buildInstrNoInsert(MergeOp) - .addDef(MO.getReg()); + MIRBuilder->buildInstrNoInsert(MergeOp).addDef(MO.getReg()); for (Register SrcReg : NewVRegs) MergeBuilder.addUse(SrcReg); @@ -200,7 +211,7 @@ MI = MergeBuilder; } else { MachineInstrBuilder UnMergeBuilder = - MIRBuilder.buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); + MIRBuilder->buildInstrNoInsert(TargetOpcode::G_UNMERGE_VALUES); for (Register DefReg : NewVRegs) UnMergeBuilder.addDef(DefReg); @@ -224,7 +235,7 @@ if (IsFirst) CurMI = MI; else - CurMI = MIRBuilder.getMF().CloneMachineInstr(MI); + CurMI = MIRBuilder->getMF().CloneMachineInstr(MI); InsertPt->insert(*CurMI); NewInstrs[Idx++] = CurMI; IsFirst = false; @@ -662,6 +673,9 @@ OptMode = Mode::Fast; init(MF); + GISelObserverWrapper WrapperObserver(MIRBuilder->getCSEInfo()); + RAIIMFObsDelInstaller Installer(MF, WrapperObserver); + #ifndef NDEBUG // Check that our input is fully legal: we require the function to have the // Legalized property, so it should be. @@ -681,7 +695,7 @@ for (MachineBasicBlock *MBB : RPOT) { // Set a sensible insertion point so that subsequent calls to // MIRBuilder. - MIRBuilder.setMBB(*MBB); + MIRBuilder->setMBB(*MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), End = MBB->end(); MII != End;) { // MI might be invalidated by the assignment, so move the @@ -715,7 +729,7 @@ if (NextInstBB != MBB) { LLVM_DEBUG(dbgs() << "Instruction mapping changed control flow\n"); MBB = NextInstBB; - MIRBuilder.setMBB(*MBB); + MIRBuilder->setMBB(*MBB); End = MBB->end(); } } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerCombiner.cpp @@ -309,6 +309,8 @@ AU.addRequired(); AU.addPreserved(); } + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -324,7 +326,14 @@ assert(MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized) && "Expected a legalized function?"); - auto *TPC = &getAnalysis(); + auto &TPC = getAnalysis(); + + // Enable CSE and inst numbering. + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig()); + CSEInfo->getInstNumbering().enable(&MF); + const Function &F = MF.getFunction(); bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); @@ -333,8 +342,8 @@ IsOptNone ? nullptr : &getAnalysis(); AArch64PostLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), KB, MDT); - Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); + Combiner C(PCInfo, &TPC); + return C.combineMachineInstrs(MF, CSEInfo); } char AArch64PostLegalizerCombiner::ID = 0; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PostLegalizerLowering.cpp @@ -666,6 +666,8 @@ AU.addRequired(); AU.setPreservesCFG(); getSelectionDAGFallbackAnalysisUsage(AU); + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -681,11 +683,18 @@ assert(MF.getProperties().hasProperty( MachineFunctionProperties::Property::Legalized) && "Expected a legalized function?"); - auto *TPC = &getAnalysis(); + auto &TPC = getAnalysis(); + + // Enable CSE and inst numbering. + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig()); + CSEInfo->getInstNumbering().enable(&MF); + const Function &F = MF.getFunction(); AArch64PostLegalizerLoweringInfo PCInfo(F.hasOptSize(), F.hasMinSize()); - Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); + Combiner C(PCInfo, &TPC); + return C.combineMachineInstrs(MF, CSEInfo); } char AArch64PostLegalizerLowering::ID = 0; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -151,6 +151,8 @@ AU.addRequired(); AU.addPreserved(); } + AU.addRequired(); + AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); } @@ -163,7 +165,14 @@ if (MF.getProperties().hasProperty( MachineFunctionProperties::Property::FailedISel)) return false; - auto *TPC = &getAnalysis(); + auto &TPC = getAnalysis(); + + // Enable CSE and inst numbering. + GISelCSEAnalysisWrapper &Wrapper = + getAnalysis().getCSEWrapper(); + auto *CSEInfo = &Wrapper.get(TPC.getCSEConfig()); + CSEInfo->getInstNumbering().enable(&MF); + const Function &F = MF.getFunction(); bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOpt::None && !skipFunction(F); @@ -172,8 +181,8 @@ IsOptNone ? nullptr : &getAnalysis(); AArch64PreLegalizerCombinerInfo PCInfo(EnableOpt, F.hasOptSize(), F.hasMinSize(), KB, MDT); - Combiner C(PCInfo, TPC); - return C.combineMachineInstrs(MF, /*CSEInfo*/ nullptr); + Combiner C(PCInfo, &TPC); + return C.combineMachineInstrs(MF, CSEInfo); } char AArch64PreLegalizerCombiner::ID = 0; @@ -182,6 +191,7 @@ false, false) INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) +INITIALIZE_PASS_DEPENDENCY(GISelCSEAnalysisWrapperPass) INITIALIZE_PASS_END(AArch64PreLegalizerCombiner, DEBUG_TYPE, "Combine AArch64 machine instrs before legalization", false, false) diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-shift-immed-mismatch-crash.mir @@ -11,16 +11,16 @@ ; CHECK: bb.0: ; CHECK: successors: %bb.1(0x40000000), %bb.2(0x40000000) ; CHECK: liveins: $x0 - ; CHECK: [[DEF:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF + ; CHECK: [[DEF:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF + ; CHECK: [[DEF1:%[0-9]+]]:_(p0) = G_IMPLICIT_DEF ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 16 ; CHECK: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 9 - ; CHECK: [[DEF1:%[0-9]+]]:_(s1) = G_IMPLICIT_DEF - ; CHECK: G_BRCOND [[DEF1]](s1), %bb.2 + ; CHECK: G_BRCOND [[DEF]](s1), %bb.2 ; CHECK: G_BR %bb.1 ; CHECK: bb.1: ; CHECK: successors: ; CHECK: bb.2: - ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF]](p0) :: (load 4 from `i32* undef`, align 8) + ; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[DEF1]](p0) :: (load 4 from `i32* undef`, align 8) ; CHECK: [[MUL:%[0-9]+]]:_(s32) = nsw G_MUL [[C]], [[LOAD]] ; CHECK: [[MUL1:%[0-9]+]]:_(s32) = nsw G_MUL [[MUL]], [[C1]] ; CHECK: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll --- a/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll +++ b/llvm/test/CodeGen/AArch64/GlobalISel/gisel-commandline-option.ll @@ -56,9 +56,9 @@ ; VERIFY-NEXT: Verify generated machine code ; ENABLED-NEXT: Analysis for ComputingKnownBits ; ENABLED-O1-NEXT: MachineDominator Tree Construction +; ENABLED-NEXT: Analysis containing CSE Info ; ENABLED-NEXT: PreLegalizerCombiner ; VERIFY-NEXT: Verify generated machine code -; ENABLED-NEXT: Analysis containing CSE Info ; ENABLED-NEXT: Legalizer ; VERIFY-NEXT: Verify generated machine code ; ENABLED: RegBankSelect diff --git a/llvm/test/CodeGen/AArch64/O0-pipeline.ll b/llvm/test/CodeGen/AArch64/O0-pipeline.ll --- a/llvm/test/CodeGen/AArch64/O0-pipeline.ll +++ b/llvm/test/CodeGen/AArch64/O0-pipeline.ll @@ -34,8 +34,8 @@ ; CHECK-NEXT: Analysis containing CSE Info ; CHECK-NEXT: IRTranslator ; CHECK-NEXT: Analysis for ComputingKnownBits -; CHECK-NEXT: AArch64PreLegalizerCombiner ; CHECK-NEXT: Analysis containing CSE Info +; CHECK-NEXT: AArch64PreLegalizerCombiner ; CHECK-NEXT: Legalizer ; CHECK-NEXT: AArch64PostLegalizerLowering ; CHECK-NEXT: RegBankSelect