Index: llvm/include/llvm/CodeGen/MachinePostRAUpdater.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/MachinePostRAUpdater.h @@ -0,0 +1,98 @@ +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" + +namespace llvm { + +class MachineBasicBlock; +class MachinePostRAUpdater; +class ReachingDefAnalysis; + +class ModifiedBlock { + friend class MachinePostRAUpdater; + +public: + ModifiedBlock() { } + ModifiedBlock(MachineInstr *MI) { add(MI); } + +private: + std::map> NewDefs; + std::map> NewUses; + SmallPtrSet DeadInsts; + SmallPtrSet NewInsts; + + void remove(MachineInstr *MI); + void add(MachineInstr *MI); + void eraseDead(); + bool hasNewDef(int PhysReg) const { return NewDefs.count(PhysReg); } + bool hasNewUse(int PhysReg) const { return NewUses.count(PhysReg); } + bool isDead(MachineInstr *MI) const { return DeadInsts.count(MI); } + const SmallPtrSetImpl &getNewDefs(int PhysReg) const { + return NewDefs.at(PhysReg); + } + const SmallPtrSetImpl &getNewUses(int PhysReg) const { + return NewUses.at(PhysReg); + } +}; + +class MachinePostRAUpdater { + ReachingDefAnalysis &RDA; + std::map ModifiedBlocks; + + template + bool isSafeToMove(MachineInstr *From, MachineInstr *To); + +public: + MachinePostRAUpdater() = delete; + MachinePostRAUpdater(ReachingDefAnalysis &RDA) : RDA(RDA) { } + /// Return whether From can be moved forwards to just before To. + bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To); + /// Return whether From can be moved backwards to just after To. + bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To); + /// Return whether removing this instruction will have no effect on the + /// program. + bool isSafeToRemove(MachineInstr *MI) const; + /// Return whether removing this instruction will have no effect on the + /// program, ignoring the possible effects on some instructions. + bool isSafeToRemove(MachineInstr *MI, + SmallPtrSetImpl &Ignore) const; + /// Return whether defining the given register at MI would have no effect + /// on the program. + bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg) const; + /// Return whether defining the given register at MI would have no effect + /// on the program, ignoring the possible effects on instructions in Ignore. + bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg, + SmallPtrSetImpl &Ignore) const; + /// Return whether an instruction has been created and inserted before + /// MI which defines the given register. + bool hasNewDefBefore(MachineInstr *MI, int PhysReg) const; + /// Return whether the given register is defined within the block. + bool isDefinedInBlock(MachineBasicBlock *MBB, int PhysReg) const; + /// Return whether the given register is defined within MIs parent block + /// but before MI. + bool hasLocalDefBefore(MachineInstr *MI, int PhysReg) const; + /// Return whether we can prove that A and B use the same value in the + /// given register. + bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) const; + /// Return the instruction which defines the register and provides the value + /// that lives out of the block, or nullptr for a non-local or newly added + /// definition. + MachineInstr *getLocalLiveOutDef(MachineBasicBlock *MBB, int PhysReg) const; + /// Return whether the value in PhysReg, used by MI, also lives out of the + /// basic block. + bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const; + /// Return the local MachineInstr that is the reaching def for MI, or nullptr + /// for a non-local definition. + MachineInstr *getReachingDef(MachineInstr *MI, int PhysReg); + + void MoveBefore(MachineInstr *From, MachineInstr *To); + void MoveAfter(MachineInstr *From, MachineInstr *To); + void Replace(MachineInstr *From, MachineInstr *To); + void Insert(MachineInstr *MI); + void Remove(MachineInstr *MI); + void Finalize(); +}; + +} // end namespace llvm + Index: llvm/include/llvm/CodeGen/ReachingDefAnalysis.h =================================================================== --- llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -93,29 +93,32 @@ /// Provides the instruction id of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. - int getReachingDef(MachineInstr *MI, int PhysReg); + int getReachingDef(MachineInstr *MI, int PhysReg) const; /// Provides the instruction of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. - MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg); + MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg) const; /// Provides the MI, from the given block, corresponding to the Id or a /// nullptr if the id does not refer to the block. - MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId); + MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId) const; /// Return whether A and B use the same def of PhysReg. bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg); /// Return whether the given register is used after MI, whether it's a local /// use or a live out. - bool isRegUsedAfter(MachineInstr *MI, int PhysReg); + bool isRegUsedAfter(MachineInstr *MI, int PhysReg) const; + + /// Return whether the given register is defined after MI. + bool isRegDefinedAfter(MachineInstr *MI, int PhysReg) const; /// Provides the first instruction before MI that uses PhysReg MachineInstr *getInstWithUseBefore(MachineInstr *MI, int PhysReg); /// Provides all instructions before MI that uses PhysReg void getAllInstWithUseBefore(MachineInstr *MI, int PhysReg, - SmallVectorImpl &Uses); + SmallPtrSetImpl &Uses); /// Provides the clearance - the number of instructions since the closest /// reaching def instuction of PhysReg that reaches MI. @@ -124,11 +127,11 @@ /// Provides the uses, in the same block as MI, of register that MI defines. /// This does not consider live-outs. void getReachingLocalUses(MachineInstr *MI, int PhysReg, - SmallVectorImpl &Uses); + SmallPtrSetImpl &Uses); - /// Provide the number of uses, in the same block as MI, of the register that - /// MI defines. - unsigned getNumUses(MachineInstr *MI, int PhysReg); + bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const; + MachineInstr* getLocalLiveOutMIDef(MachineBasicBlock *MBB, + int PhysReg) const; private: /// Set up LiveRegs by merging predecessor live-out values. Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -89,6 +89,7 @@ MachineOutliner.cpp MachinePipeliner.cpp MachinePostDominators.cpp + MachinePostRAUpdater.cpp MachineRegionInfo.cpp MachineRegisterInfo.cpp MachineScheduler.cpp Index: llvm/lib/CodeGen/MachinePostRAUpdater.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/MachinePostRAUpdater.cpp @@ -0,0 +1,317 @@ +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachinePostRAUpdater.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "post-ra-updater" + +using namespace llvm; + +void ModifiedBlock::eraseDead() { + for (auto *MI : DeadInsts) { + LLVM_DEBUG(dbgs() << "PostRA Updater: Erasing " << *MI); + MI->eraseFromParent(); + } +} + +void ModifiedBlock::add(MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "PostRA Updater: Adding " << *MI); + NewInsts.insert(MI); + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) + NewDefs[MO.getReg()].insert(MI); + if (MO.isUse()) + NewUses[MO.getReg()].insert(MI); + } +} + +void ModifiedBlock::remove(MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "PostRA Updater: Removing " << *MI); + DeadInsts.insert(MI); + if (NewInsts.count(MI)) + NewInsts.erase(MI); +} + +// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must +// not define a register that is used by any instructions, after and including, +// 'To'. These instructions also must not redefine any of Froms operands. +template +bool MachinePostRAUpdater::isSafeToMove(MachineInstr *From, MachineInstr *To) { + if (From->getParent() != To->getParent()) + return false; + if (ModifiedBlocks.count(From->getParent())) + return false; + + SmallSet Defs; + // First check that From would compute the same value if moved. + for (auto &MO : From->operands()) { + if (!MO.isReg() || MO.isUndef() || !MO.getReg()) + continue; + if (MO.isDef()) + Defs.insert(MO.getReg()); + else if (!RDA.hasSameReachingDef(From, To, MO.getReg())) + return false; + } + + // Now walk checking that the rest of the instructions will compute the same + // value. + for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.getReg() && MO.isUse() && Defs.count(MO.getReg())) + return false; + } + return true; +} + +bool MachinePostRAUpdater::isSafeToMoveForwards(MachineInstr *From, + MachineInstr *To) { + return isSafeToMove(From, To); +} + +bool MachinePostRAUpdater::isSafeToMoveBackwards(MachineInstr *From, + MachineInstr *To) { + return isSafeToMove(From, To); +} + +bool MachinePostRAUpdater::isSafeToRemove(MachineInstr *MI) const { + SmallPtrSet Ignore; + return isSafeToRemove(MI, Ignore); +} + +bool MachinePostRAUpdater::isSafeToRemove(MachineInstr *MI, + SmallPtrSetImpl &Ignore) const { + MachineBasicBlock *MBB = MI->getParent(); + + // Firstly, if this block has already been modified, then check if the MI + // maybe used, or uses, any new instructions. + if (ModifiedBlocks.count(MBB)) { + SmallSet Defs; + SmallSet Uses; + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isUse()) + Uses.insert(MO.getReg()); + if (MO.isDef()) + Defs.insert(MO.getReg()); + } + + for (int Use : Uses) + if (ModifiedBlocks.at(MBB).hasNewDef(Use)) + return false; + + for (int Def : Defs) + if (ModifiedBlocks.at(MBB).hasNewUse(Def)) + return false; + } + + auto UnsafeUses = [](SmallPtrSetImpl &Uses, + SmallPtrSetImpl &Ignore) { + if (Uses.empty()) + return false; + for (auto MI : Uses) + if (!Ignore.count(MI)) + return true; + return false; + }; + + // Next, check if there's any uses proceeding MI. + SmallVector Defs; + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + + SmallPtrSet Uses; + RDA.getReachingLocalUses(MI, MO.getReg(), Uses); + if (UnsafeUses(Uses, Ignore)) + return false; + + Defs.push_back(MO.getReg()); + } + + // Look for live outs. A special case is handled here where a successor + // maybe the same block, i.e a loop, so we can check for local uses + // preceeding MI. If we have an instruction that is only used by itself, + // then we can remove it. + for (auto Def : Defs) { + if (auto *LiveOut = getLocalLiveOutDef(MI->getParent(), Def)) { + if (LiveOut != MI) + continue; + + for (auto SuccBB : MBB->successors()) { + if (!SuccBB->isLiveIn(Def)) + continue; + + if (SuccBB == MBB) { + SmallPtrSet Uses; + RDA.getAllInstWithUseBefore(MI, Def, Uses); + if (UnsafeUses(Uses, Ignore)) + return false; + } else + return false; + } + } + } + return true; +} + +void MachinePostRAUpdater::MoveAfter(MachineInstr *From, MachineInstr *To) { + assert(From->getParent() == To->getParent() && + "Can't move instructions between blocks"); + MachineBasicBlock *MBB = From->getParent(); + From->removeFromParent(); + MBB->insertAfter(MachineBasicBlock::iterator(To), From); +} + +void MachinePostRAUpdater::MoveBefore(MachineInstr *From, MachineInstr *To) { + assert(From->getParent() == To->getParent() && + "Can't move instructions between blocks"); + MachineBasicBlock *MBB = From->getParent(); + From->removeFromParent(); + MBB->insert(MachineBasicBlock::iterator(To), From); +} + +bool +MachinePostRAUpdater::isSafeToDefRegAt(MachineInstr *MI, int PhysReg) const { + SmallPtrSet Ignore; + return isSafeToDefRegAt(MI, PhysReg, Ignore); +} + +bool +MachinePostRAUpdater::isSafeToDefRegAt(MachineInstr *MI, int PhysReg, + SmallPtrSetImpl &Ignore) const { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB) && ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return false; + + // Check for any uses of the register after MI. + if (RDA.isRegUsedAfter(MI, PhysReg)) { + if (auto *Def = RDA.getReachingMIDef(MI, PhysReg)) { + SmallPtrSet Uses; + RDA.getReachingLocalUses(Def, PhysReg, Uses); + for (auto *Use : Uses) + if (!Ignore.count(Use)) + return false; + } else + return false; + } + + // Check for any defs after MI. + if (RDA.isRegDefinedAfter(MI, PhysReg)) { + auto I = MachineBasicBlock::iterator(MI); + for (auto E = MBB->end(); I != E; ++I) { + if (Ignore.count(&*I)) + continue; + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return false; + } + } + return true; +} + +bool +MachinePostRAUpdater::hasNewDefBefore(MachineInstr *MI, int PhysReg) const { + auto isBefore = [](MachineInstr *A, MachineInstr *B) { + return std::distance(MachineBasicBlock::iterator(A), + MachineBasicBlock::iterator(B)) > 0; + }; + + MachineBasicBlock *MBB = MI->getParent(); + if (!ModifiedBlocks.count(MBB)) + return false; + if (!ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return false; + for (auto *NewDef : ModifiedBlocks.at(MBB).getNewDefs(PhysReg)) + if (isBefore(NewDef, MI)) + return true; + return false; +} + +bool +MachinePostRAUpdater::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const { + if (hasNewDefBefore(MI, PhysReg)) + return true; + return RDA.getReachingDef(MI, PhysReg) >= 0; +} + +bool MachinePostRAUpdater::isDefinedInBlock(MachineBasicBlock *MBB, + int PhysReg) const { + return hasLocalDefBefore(&MBB->back(), PhysReg) || + !isReachingDefLiveOut(&MBB->back(), PhysReg); +} + +void MachinePostRAUpdater::Finalize() { + LLVM_DEBUG(dbgs() << "PostRA Updater: Finalizing changes...\n"); + for (auto &Pair : ModifiedBlocks) + Pair.second.eraseDead(); +} + +void MachinePostRAUpdater::Replace(MachineInstr *Old, MachineInstr *New) { + assert(Old->getParent() == New->getParent() && + "Can't replace instructions in different blocks"); + LLVM_DEBUG(dbgs() << "PostRA Updater:\n- Replacing " << *Old + << "- With " << *New); + Insert(New); + Remove(Old); +} + +void MachinePostRAUpdater::Insert(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB)) + ModifiedBlocks.at(MBB).add(MI); + else + ModifiedBlocks.emplace(MBB, ModifiedBlock(MI)); +} + +void MachinePostRAUpdater::Remove(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB)) + ModifiedBlocks.at(MBB).remove(MI); + else { + ModifiedBlocks.emplace(MBB, ModifiedBlock()); + ModifiedBlocks.at(MBB).remove(MI); + } +} + +bool +MachinePostRAUpdater::hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) const { + // For now, just bail if we find a new def. + if ((ModifiedBlocks.count(A->getParent()) && + ModifiedBlocks.at(A->getParent()).hasNewDef(PhysReg)) || + (ModifiedBlocks.count(B->getParent()) && + ModifiedBlocks.at(B->getParent()).hasNewDef(PhysReg))) + return false; + return RDA.hasSameReachingDef(A, B, PhysReg); +} + +MachineInstr* MachinePostRAUpdater::getReachingDef(MachineInstr *MI, + int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + if (hasNewDefBefore(MI, PhysReg)) + return nullptr; + + MachineInstr *Def = RDA.getReachingMIDef(MI, PhysReg); + if (ModifiedBlocks.count(MBB) && ModifiedBlocks.at(MBB).isDead(Def)) + return nullptr; + return Def; +} + +MachineInstr* MachinePostRAUpdater::getLocalLiveOutDef(MachineBasicBlock *MBB, + int PhysReg) const { + if (ModifiedBlocks.count(MBB) && ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return nullptr; + return RDA.getLocalLiveOutMIDef(MBB, PhysReg); +} + +bool MachinePostRAUpdater::isReachingDefLiveOut(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB) && + ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return false; + return RDA.isReachingDefLiveOut(MI, PhysReg); +} Index: llvm/lib/CodeGen/ReachingDefAnalysis.cpp =================================================================== --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -171,9 +171,9 @@ InstIds.clear(); } -int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); - int InstId = InstIds[MI]; + int InstId = InstIds.lookup(MI); int DefRes = ReachingDefDefaultVal; unsigned MBBNumber = MI->getParent()->getNumber(); assert(MBBNumber < MBBReachingDefs.size() && @@ -190,7 +190,8 @@ return LatestDef; } -MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) { +MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, + int PhysReg) const { return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); } @@ -205,7 +206,7 @@ } MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, - int InstId) { + int InstId) const { assert(static_cast(MBB->getNumber()) < MBBReachingDefs.size() && "Unexpected basic block number."); assert(InstId < static_cast(MBB->size()) && @@ -215,7 +216,7 @@ return nullptr; for (auto &MI : *MBB) { - if (InstIds.count(&MI) && InstIds[&MI] == InstId) + if (InstIds.count(&MI) && InstIds.lookup(&MI) == InstId) return &MI; } return nullptr; @@ -227,7 +228,7 @@ } void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, - SmallVectorImpl &Uses) { + SmallPtrSetImpl &Uses) { MachineBasicBlock *MBB = Def->getParent(); MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); while (++MI != MBB->end()) { @@ -240,20 +241,14 @@ if (getReachingMIDef(&*MI, PhysReg) != Def) return; - Uses.push_back(&*MI); + Uses.insert(&*MI); if (MO.isKill()) return; } } } -unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) { - SmallVector Uses; - getReachingLocalUses(Def, PhysReg, Uses); - return Uses.size(); -} - -bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); @@ -267,11 +262,23 @@ for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { LiveRegs.stepBackward(*Last); if (LiveRegs.contains(PhysReg)) - return InstIds[&*Last] > InstIds[MI]; + return InstIds.lookup(&*Last) > InstIds.lookup(MI); } return false; } +bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg)) + return true; + + if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) + return Def == getReachingMIDef(MI, PhysReg); + + return false; +} + MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI, int PhysReg) { auto I = MachineBasicBlock::reverse_iterator(MI); @@ -287,12 +294,49 @@ } void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI, - int PhysReg, SmallVectorImpl &Uses) { + int PhysReg, SmallPtrSetImpl &Uses) { MachineInstr *Use = nullptr; MachineInstr *Pos = MI; while ((Use = getInstWithUseBefore(Pos, PhysReg))) { - Uses.push_back(Use); + Uses.insert(Use); Pos = Use; } } + +bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return false; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(MI, PhysReg); + if (getReachingDef(Last, PhysReg) != Def) + return false; + + // Finally check that the last instruction doesn't redefine the register. + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return false; + + return true; +} + +MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, + int PhysReg) const { + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return nullptr; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(Last, PhysReg); + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return Last; + + return Def < 0 ? nullptr : getInstFromId(MBB, Def); +} Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -26,8 +26,8 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachinePostRAUpdater.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/MC/MCInstrDesc.h" using namespace llvm; @@ -107,11 +107,12 @@ // Is it safe to define LR with DLS/WLS? // LR can be defined if it is the operand to start, because it's the same // value, or if it's going to be equivalent to the operand to Start. - MachineInstr *IsSafeToDefineLR(ReachingDefAnalysis *RDA); + MachineInstr *isSafeToDefineLR(MachinePostRAUpdater *Updater); // Check the branch targets are within range and we satisfy our // restrictions. - void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA, + void CheckLegality(ARMBasicBlockUtils *BBUtils, + MachinePostRAUpdater *Updater, MachineLoopInfo *MLI); bool FoundAllComponents() const { @@ -148,7 +149,7 @@ class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; MachineLoopInfo *MLI = nullptr; - ReachingDefAnalysis *RDA = nullptr; + std::unique_ptr Updater; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; const TargetRegisterInfo *TRI = nullptr; @@ -185,7 +186,7 @@ void RevertWhile(MachineInstr *MI) const; - bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const; + bool RevertLoopDec(MachineInstr *MI) const; void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; @@ -205,7 +206,7 @@ INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) -MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { +MachineInstr *LowOverheadLoop::isSafeToDefineLR(MachinePostRAUpdater *Updater) { // We can define LR because LR already contains the same value. if (Start->getOperand(0).getReg() == ARM::LR) return Start; @@ -223,23 +224,26 @@ // Find an insertion point: // - Is there a (mov lr, Count) before Start? If so, and nothing else writes // to Count before Start, we can insert at that mov. + if (auto *LRDef = Updater->getReachingDef(Start, ARM::LR)) + if (IsMoveLR(LRDef) && Updater->hasSameReachingDef(Start, LRDef, CountReg)) + return LRDef; + // - Is there a (mov lr, Count) after Start? If so, and nothing else writes // to Count after Start, we can insert at that mov. - if (auto *LRDef = RDA->getReachingMIDef(&MBB->back(), ARM::LR)) { - if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg)) + if (auto *LRDef = Updater->getLocalLiveOutDef(MBB, ARM::LR)) + if (IsMoveLR(LRDef) && Updater->hasSameReachingDef(Start, LRDef, CountReg)) return LRDef; - } // We've found no suitable LR def and Start doesn't use LR directly. Can we // just define LR anyway? - if (!RDA->isRegUsedAfter(Start, ARM::LR)) + if (Updater->isSafeToDefRegAt(Start, ARM::LR)) return Start; return nullptr; } void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, - ReachingDefAnalysis *RDA, + MachinePostRAUpdater *Updater, MachineLoopInfo *MLI) { if (Revert) return; @@ -273,7 +277,7 @@ return; } - InsertPt = Revert ? nullptr : IsSafeToDefineLR(RDA); + InsertPt = Revert ? nullptr : isSafeToDefineLR(Updater); if (!InsertPt) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; @@ -293,28 +297,46 @@ // If the register is defined within loop, then we can't perform TP. // TODO: Check whether this is just a mov of a register that would be // available. - if (RDA->getReachingDef(VCTP, NumElements) >= 0) { + if (Updater->hasLocalDefBefore(VCTP, NumElements)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found in-loop element count def.\n"); CannotTailPredicate = true; return; } - // We can't perform TP if the register does not hold the same value at - // InsertPt as the liveout value. + // The element count register maybe defined after InsertPt, in which case we + // need to try to move either InsertPt or the def so that the [w|d]lstp can + // use the value. MachineBasicBlock *InsertBB = InsertPt->getParent(); - if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(), - NumElements)) { - CannotTailPredicate = true; - return; + if (!Updater->isReachingDefLiveOut(InsertPt, NumElements)) { + if (auto *ElemDef = Updater->getLocalLiveOutDef(InsertBB, NumElements)) { + if (Updater->isSafeToMoveForwards(ElemDef, InsertPt)) { + Updater->MoveBefore(ElemDef, InsertPt); + LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " + << *ElemDef); + } else if (Updater->isSafeToMoveBackwards(InsertPt, ElemDef)) { + Updater->MoveAfter(ElemDef, InsertPt); + LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); + } else { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to use element count in:\n" + << *InsertBB); + CannotTailPredicate = true; + return; + } + } } // Especially in the case of while loops, InsertBB may not be the // preheader, so we need to check that the register isn't redefined // before entering the loop. - auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB, - Register NumElements) { - // NumElements is redefined in this block. - if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0) + auto CannotProvideElements = [&Updater](MachineBasicBlock *MBB, + Register NumElements) { + if (Updater->isDefinedInBlock(MBB, NumElements)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found a def of element count:\n"; + if (auto *Def = Updater->getReachingDef(&MBB->back(), + NumElements)) + dbgs() << " - " << *Def); return true; + } // Don't continue searching up through multiple predecessors. if (MBB->pred_size() > 1) @@ -333,8 +355,11 @@ // Then search backwards for a def, until we get to InsertBB. while (MBB != InsertBB) { CannotTailPredicate = CannotProvideElements(MBB, NumElements); - if (CannotTailPredicate) + if (CannotTailPredicate) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to use element count in:\n" + << *MBB); return; + } MBB = *MBB->pred_begin(); } @@ -352,7 +377,8 @@ LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); MLI = &getAnalysis(); - RDA = &getAnalysis(); + Updater = std::make_unique( + getAnalysis()); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); TII = static_cast(ST.getInstrInfo()); @@ -366,6 +392,7 @@ if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Updater->Finalize(); Changed |= RevertNonLoops(); return Changed; } @@ -467,7 +494,7 @@ if (!LoLoop.FoundAllComponents()) return false; - LoLoop.CheckLegality(BBUtils.get(), RDA, MLI); + LoLoop.CheckLegality(BBUtils.get(), Updater.get(), MLI); Expand(LoLoop); return true; } @@ -485,6 +512,7 @@ MIB.addImm(0); MIB.addImm(ARMCC::AL); MIB.addReg(ARM::NoRegister); + Updater->Insert(&*MIB); MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? @@ -494,19 +522,21 @@ MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::EQ); // condition code MIB.addReg(ARM::CPSR); - MI->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(MI); } -bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, - bool SetFlags) const { +bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *Last = &MBB->back(); + + SmallPtrSet Ignore; + if (Last->getOpcode() == ARM::t2LoopEnd) + Ignore.insert(Last); // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS. - if (SetFlags && - (RDA->isRegUsedAfter(MI, ARM::CPSR) || - !RDA->hasSameReachingDef(MI, &MBB->back(), ARM::CPSR))) - SetFlags = false; + bool SetFlags = Updater->isSafeToDefRegAt(MI, ARM::CPSR, Ignore); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); @@ -522,7 +552,8 @@ } else MIB.addReg(0); - MI->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(MI); return SetFlags; } @@ -539,6 +570,7 @@ MIB.addImm(0); MIB.addImm(ARMCC::AL); MIB.addReg(ARM::NoRegister); + Updater->Insert(&*MIB); } MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); @@ -551,7 +583,8 @@ MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::NE); // condition code MIB.addReg(ARM::CPSR); - MI->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(MI); } MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { @@ -574,33 +607,34 @@ // calculate the number of loop iterations. if (LoLoop.IsTailPredicationLegal()) { SmallVector Killed; - SmallVector Dead; - if (auto *Def = RDA->getReachingMIDef(Start, - Start->getOperand(0).getReg())) { + SmallPtrSet Dead; + if (auto *Def = Updater->getReachingDef(Start, + Start->getOperand(0).getReg())) { Killed.push_back(Def); while (!Killed.empty()) { MachineInstr *Def = Killed.back(); Killed.pop_back(); - Dead.push_back(Def); + Dead.insert(Def); for (auto &MO : Def->operands()) { if (!MO.isReg() || !MO.isKill()) continue; - MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg()); - if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1) - Killed.push_back(Kill); + if (MachineInstr *Kill = Updater->getReachingDef(Def, MO.getReg())) + if (Updater->isSafeToRemove(Kill, Dead)) + Killed.push_back(Kill); } } for (auto *MI : Dead) - MI->eraseFromParent(); + Updater->Remove(MI); } } // If we're inserting at a mov lr, then remove it as it's redundant. if (InsertPt != Start) - InsertPt->eraseFromParent(); - Start->eraseFromParent(); + Updater->Remove(InsertPt); + Updater->Remove(Start); + Updater->Insert(&*MIB); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); return &*MIB; } @@ -633,51 +667,24 @@ LoLoop.VCTP->getOperand(1).dump()); // Find the definition we are interested in removing, if there is one. - MachineInstr *Def = RDA->getReachingMIDef(LastInstrInBlock, ElemCount); + MachineInstr *Def = Updater->getReachingDef(LastInstrInBlock, ElemCount); if (!Def) return; - // Bail if we define CPSR and it is not dead - if (!Def->registerDefIsDead(ARM::CPSR, TRI)) { - LLVM_DEBUG(dbgs() << "ARM Loops: CPSR is not dead\n"); - return; - } - - // Bail if elemcount is used in exit blocks, i.e. if it is live-in. - if (isRegLiveInExitBlocks(LoLoop.ML, ElemCount)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Elemcount is live-out, can't remove stmt\n"); + SmallPtrSet Ignore = { LoLoop.VCTP }; + if (!Updater->isSafeToRemove(Def, Ignore)) return; - } - // Bail if there are uses after this Def in the block. - SmallVector Uses; - RDA->getReachingLocalUses(Def, ElemCount, Uses); - if (Uses.size()) { - LLVM_DEBUG(dbgs() << "ARM Loops: Local uses in block, can't remove stmt\n"); - return; - } - - Uses.clear(); - RDA->getAllInstWithUseBefore(Def, ElemCount, Uses); - - // Remove Def if there are no uses, or if the only use is the VCTP - // instruction. - if (!Uses.size() || (Uses.size() == 1 && Uses[0] == LoLoop.VCTP)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop update instruction: "; - Def->dump()); - Def->eraseFromParent(); - } + Updater->Remove(Def); } void ARMLowOverheadLoops::RemoveVPTBlocks(LowOverheadLoop &LoLoop) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP); - LoLoop.VCTP->eraseFromParent(); + Updater->Remove(LoLoop.VCTP); for (auto *MI : LoLoop.VPTUsers) { - if (MI->getOpcode() == ARM::MVE_VPST) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *MI); - MI->eraseFromParent(); - } else { + if (MI->getOpcode() == ARM::MVE_VPST) + Updater->Remove(MI); + else { unsigned OpNum = MI->getNumOperands() - 1; assert((MI->getOperand(OpNum).isReg() && MI->getOperand(OpNum).getReg() == ARM::VPR) && @@ -705,10 +712,10 @@ MIB.addDef(ARM::LR); MIB.add(End->getOperand(0)); MIB.add(End->getOperand(1)); - LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB); - LoLoop.End->eraseFromParent(); - LoLoop.Dec->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(LoLoop.End); + Updater->Remove(LoLoop.Dec); return &*MIB; }; @@ -717,15 +724,13 @@ // instructions. // If there is an unconditional branch, after I, that just branches to the // next block, remove it. - auto RemoveDeadBranch = [](MachineInstr *I) { + auto RemoveDeadBranch = [this](MachineInstr *I) { MachineBasicBlock *BB = I->getParent(); MachineInstr *Terminator = &BB->instr_back(); if (Terminator->isUnconditionalBranch() && I != Terminator) { MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); - if (BB->isLayoutSuccessor(Succ)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator); - Terminator->eraseFromParent(); - } + if (BB->isLayoutSuccessor(Succ)) + Updater->Remove(Terminator); } }; @@ -734,7 +739,7 @@ RevertWhile(LoLoop.Start); else LoLoop.Start->eraseFromParent(); - bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec, true); + bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec); RevertLoopEnd(LoLoop.End, FlagsAlreadySet); } else { LoLoop.Start = ExpandLoopStart(LoLoop); Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir @@ -167,6 +167,7 @@ tB %bb.2, 14, $noreg bb.2.for.cond.cleanup: + liveins: $cpsr tPOP_RET 14, $noreg, def $r7, def $pc ... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -388,7 +388,6 @@ ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB5_5 @@ -593,7 +592,6 @@ ; CHECK-NEXT: vldrh.s32 q0, [r0], #8 ; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB6_1 @@ -685,7 +683,6 @@ ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB7_5 @@ -890,7 +887,6 @@ ; CHECK-NEXT: vldrh.u32 q0, [r0], #8 ; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB8_1 @@ -980,7 +976,6 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB9_5 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -210,7 +210,6 @@ ; CHECK: renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.scevgep23, align 1) ; CHECK: renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg ; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg ; CHECK: renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store 16 into %ir.scevgep1, align 1) ; CHECK: $lr = MVE_LETP renamable $lr, %bb.2 @@ -330,7 +329,6 @@ ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14, $noreg ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14, $noreg ; CHECK: $lr = MVE_LETP renamable $lr, %bb.1 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc