Index: llvm/include/llvm/CodeGen/MachinePostRAUpdater.h =================================================================== --- /dev/null +++ llvm/include/llvm/CodeGen/MachinePostRAUpdater.h @@ -0,0 +1,100 @@ +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/ReachingDefAnalysis.h" + +namespace llvm { + +class MachineBasicBlock; +class MachinePostRAUpdater; +class ReachingDefAnalysis; + +class ModifiedBlock { + friend class MachinePostRAUpdater; + +public: + ModifiedBlock() { } + ModifiedBlock(MachineInstr *MI) { add(MI); } + +private: + std::map> NewDefs; + std::map> NewUses; + SmallPtrSet DeadInsts; + SmallPtrSet NewInsts; + + void remove(MachineInstr *MI); + void add(MachineInstr *MI); + void eraseDead(); + bool hasNewDef(int PhysReg) const { return NewDefs.count(PhysReg); } + bool hasNewUse(int PhysReg) const { return NewUses.count(PhysReg); } + bool isDead(MachineInstr *MI) const { return DeadInsts.count(MI); } + const SmallPtrSetImpl &getNewDefs(int PhysReg) const { + return NewDefs.at(PhysReg); + } + const SmallPtrSetImpl &getNewUses(int PhysReg) const { + return NewUses.at(PhysReg); + } +}; + +class MachinePostRAUpdater { + ReachingDefAnalysis &RDA; + std::map ModifiedBlocks; + + template + bool isSafeToMove(MachineInstr *From, MachineInstr *To); + +public: + MachinePostRAUpdater() = delete; + MachinePostRAUpdater(ReachingDefAnalysis &RDA) : RDA(RDA) { } + /// Return whether From can be moved forwards to just before To. + bool isSafeToMoveForwards(MachineInstr *From, MachineInstr *To); + /// Return whether From can be moved backwards to just after To. + bool isSafeToMoveBackwards(MachineInstr *From, MachineInstr *To); + /// Return whether removing this instruction will have no effect on the + /// program. + bool isSafeToRemove(MachineInstr *MI, + SmallPtrSetImpl &ToRemove) const; + /// Return whether removing this instruction will have no effect on the + /// program, ignoring the possible effects on some instructions. + bool isSafeToRemove(MachineInstr *MI, + SmallPtrSetImpl &ToRemove, + SmallPtrSetImpl &Ignore) const; + /// Return whether defining the given register at MI would have no effect + /// on the program. + bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg) const; + /// Return whether defining the given register at MI would have no effect + /// on the program, ignoring the possible effects on instructions in Ignore. + bool isSafeToDefRegAt(MachineInstr *MI, int PhysReg, + SmallPtrSetImpl &Ignore) const; + /// Return whether an instruction has been created and inserted before + /// MI which defines the given register. + bool hasNewDefBefore(MachineInstr *MI, int PhysReg) const; + /// Return whether the given register is defined within the block. + bool isDefinedInBlock(MachineBasicBlock *MBB, int PhysReg) const; + /// Return whether the given register is defined within MIs parent block + /// but before MI. + bool hasLocalDefBefore(MachineInstr *MI, int PhysReg) const; + /// Return whether we can prove that A and B use the same value in the + /// given register. + bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) const; + /// Return the instruction which defines the register and provides the value + /// that lives out of the block, or nullptr for a non-local or newly added + /// definition. + MachineInstr *getLocalLiveOutDef(MachineBasicBlock *MBB, int PhysReg) const; + /// Return whether the value in PhysReg, used by MI, also lives out of the + /// basic block. + bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const; + /// Return the local MachineInstr that is the reaching def for MI, or nullptr + /// for a non-local definition. + MachineInstr *getReachingDef(MachineInstr *MI, int PhysReg); + + void MoveBefore(MachineInstr *From, MachineInstr *To); + void MoveAfter(MachineInstr *From, MachineInstr *To); + void Replace(MachineInstr *From, MachineInstr *To); + void Insert(MachineInstr *MI); + void Remove(MachineInstr *MI); + void Finalize(); +}; + +} // end namespace llvm + Index: llvm/include/llvm/CodeGen/ReachingDefAnalysis.h =================================================================== --- llvm/include/llvm/CodeGen/ReachingDefAnalysis.h +++ llvm/include/llvm/CodeGen/ReachingDefAnalysis.h @@ -93,29 +93,32 @@ /// Provides the instruction id of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. - int getReachingDef(MachineInstr *MI, int PhysReg); + int getReachingDef(MachineInstr *MI, int PhysReg) const; /// Provides the instruction of the closest reaching def instruction of /// PhysReg that reaches MI, relative to the begining of MI's basic block. - MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg); + MachineInstr *getReachingMIDef(MachineInstr *MI, int PhysReg) const; /// Provides the MI, from the given block, corresponding to the Id or a /// nullptr if the id does not refer to the block. - MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId); + MachineInstr *getInstFromId(MachineBasicBlock *MBB, int InstId) const; /// Return whether A and B use the same def of PhysReg. bool hasSameReachingDef(MachineInstr *A, MachineInstr *B, int PhysReg); /// Return whether the given register is used after MI, whether it's a local /// use or a live out. - bool isRegUsedAfter(MachineInstr *MI, int PhysReg); + bool isRegUsedAfter(MachineInstr *MI, int PhysReg) const; + + /// Return whether the given register is defined after MI. + bool isRegDefinedAfter(MachineInstr *MI, int PhysReg) const; /// Provides the first instruction before MI that uses PhysReg MachineInstr *getInstWithUseBefore(MachineInstr *MI, int PhysReg); /// Provides all instructions before MI that uses PhysReg void getAllInstWithUseBefore(MachineInstr *MI, int PhysReg, - SmallVectorImpl &Uses); + SmallPtrSetImpl &Uses); /// Provides the clearance - the number of instructions since the closest /// reaching def instuction of PhysReg that reaches MI. @@ -124,11 +127,11 @@ /// Provides the uses, in the same block as MI, of register that MI defines. /// This does not consider live-outs. void getReachingLocalUses(MachineInstr *MI, int PhysReg, - SmallVectorImpl &Uses); + SmallPtrSetImpl &Uses); - /// Provide the number of uses, in the same block as MI, of the register that - /// MI defines. - unsigned getNumUses(MachineInstr *MI, int PhysReg); + bool isReachingDefLiveOut(MachineInstr *MI, int PhysReg) const; + MachineInstr* getLocalLiveOutMIDef(MachineBasicBlock *MBB, + int PhysReg) const; private: /// Set up LiveRegs by merging predecessor live-out values. Index: llvm/lib/CodeGen/CMakeLists.txt =================================================================== --- llvm/lib/CodeGen/CMakeLists.txt +++ llvm/lib/CodeGen/CMakeLists.txt @@ -89,6 +89,7 @@ MachineOutliner.cpp MachinePipeliner.cpp MachinePostDominators.cpp + MachinePostRAUpdater.cpp MachineRegionInfo.cpp MachineRegisterInfo.cpp MachineScheduler.cpp Index: llvm/lib/CodeGen/MachinePostRAUpdater.cpp =================================================================== --- /dev/null +++ llvm/lib/CodeGen/MachinePostRAUpdater.cpp @@ -0,0 +1,306 @@ +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachinePostRAUpdater.h" +#include "llvm/Support/Debug.h" + +#define DEBUG_TYPE "post-ra-updater" + +using namespace llvm; + +void ModifiedBlock::eraseDead() { + for (auto *MI : DeadInsts) { + LLVM_DEBUG(dbgs() << "PostRA Updater: Erasing " << *MI); + MI->eraseFromParent(); + } +} + +void ModifiedBlock::add(MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "PostRA Updater: Adding " << *MI); + NewInsts.insert(MI); + for (auto &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (MO.isDef()) + NewDefs[MO.getReg()].insert(MI); + if (MO.isUse()) + NewUses[MO.getReg()].insert(MI); + } +} + +void ModifiedBlock::remove(MachineInstr *MI) { + LLVM_DEBUG(dbgs() << "PostRA Updater: Removing " << *MI); + DeadInsts.insert(MI); + if (NewInsts.count(MI)) + NewInsts.erase(MI); +} + +// Can we safely move 'From' to just before 'To'? To satisfy this, 'From' must +// not define a register that is used by any instructions, after and including, +// 'To'. These instructions also must not redefine any of Froms operands. +template +bool MachinePostRAUpdater::isSafeToMove(MachineInstr *From, MachineInstr *To) { + if (From->getParent() != To->getParent()) + return false; + if (ModifiedBlocks.count(From->getParent())) + return false; + + SmallSet Defs; + // First check that From would compute the same value if moved. + for (auto &MO : From->operands()) { + if (!MO.isReg() || MO.isUndef() || !MO.getReg()) + continue; + if (MO.isDef()) + Defs.insert(MO.getReg()); + else if (!RDA.hasSameReachingDef(From, To, MO.getReg())) + return false; + } + + // Now walk checking that the rest of the instructions will compute the same + // value. + for (auto I = ++Iterator(From), E = Iterator(To); I != E; ++I) { + for (auto &MO : I->operands()) + if (MO.isReg() && MO.getReg() && MO.isUse() && Defs.count(MO.getReg())) + return false; + } + return true; +} + +bool MachinePostRAUpdater::isSafeToMoveForwards(MachineInstr *From, + MachineInstr *To) { + return isSafeToMove(From, To); +} + +bool MachinePostRAUpdater::isSafeToMoveBackwards(MachineInstr *From, + MachineInstr *To) { + return isSafeToMove(From, To); +} + +bool +MachinePostRAUpdater::isSafeToRemove(MachineInstr *MI, + SmallPtrSetImpl &ToRemove) const { + SmallPtrSet Ignore; + return isSafeToRemove(MI, ToRemove, Ignore); +} + +bool +MachinePostRAUpdater::isSafeToRemove(MachineInstr *MI, + SmallPtrSetImpl &ToRemove, + SmallPtrSetImpl &Ignore) const { + LLVM_DEBUG(dbgs() << "PostRA Updater: Is safe to remove " << *MI); + + auto getLocalUses = [this](MachineInstr *MI, + SmallPtrSetImpl &Uses) { + MachineBasicBlock *MBB = MI->getParent(); + for (auto &MO : MI->operands()) { + if (!MO.isReg() || !MO.isDef()) + continue; + + // Collect the uses that each def touches within the block. + RDA.getReachingLocalUses(MI, MO.getReg(), Uses); + + // If the value is live-out, check if this block is a single-block loop, + // and if so, then check the users before MI. If we find that the value + // if live-out in any other block then we can't remove it. + if (auto *LiveOut = getLocalLiveOutDef(MI->getParent(), MO.getReg())) { + if (LiveOut != MI) + continue; + + for (auto SuccBB : MBB->successors()) { + if (!SuccBB->isLiveIn(MO.getReg())) + continue; + + if (SuccBB == MBB) + RDA.getAllInstWithUseBefore(MI, MO.getReg(), Uses); + else { + LLVM_DEBUG(dbgs() << " - No, it has a live out into " + << SuccBB->getName() << "\n"); + return false; + } + } + } + } + return true; + }; + + // Unless told to ignore the instruction, don't remove anything which has + // side effects. + if ((MI->mayLoadOrStore() || MI->hasUnmodeledSideEffects()) && + !Ignore.count(MI)) { + LLVM_DEBUG(dbgs() << " - No, it has side effects.\n"); + return false; + } + + ToRemove.insert(MI); + + SmallPtrSet Uses; + if (!getLocalUses(MI, Uses)) + return false; + + for (auto I : Uses) { + if (Ignore.count(I) || ToRemove.count(I)) + continue; + if (!isSafeToRemove(I, ToRemove, Ignore)) + return false; + } + LLVM_DEBUG(dbgs() << " - Yes.\n"); + return true; +} + +void MachinePostRAUpdater::MoveAfter(MachineInstr *From, MachineInstr *To) { + assert(From->getParent() == To->getParent() && + "Can't move instructions between blocks"); + MachineBasicBlock *MBB = From->getParent(); + From->removeFromParent(); + MBB->insertAfter(MachineBasicBlock::iterator(To), From); +} + +void MachinePostRAUpdater::MoveBefore(MachineInstr *From, MachineInstr *To) { + assert(From->getParent() == To->getParent() && + "Can't move instructions between blocks"); + MachineBasicBlock *MBB = From->getParent(); + From->removeFromParent(); + MBB->insert(MachineBasicBlock::iterator(To), From); +} + +bool +MachinePostRAUpdater::isSafeToDefRegAt(MachineInstr *MI, int PhysReg) const { + SmallPtrSet Ignore; + return isSafeToDefRegAt(MI, PhysReg, Ignore); +} + +bool +MachinePostRAUpdater::isSafeToDefRegAt(MachineInstr *MI, int PhysReg, + SmallPtrSetImpl &Ignore) const { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB) && ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return false; + + // Check for any uses of the register after MI. + if (RDA.isRegUsedAfter(MI, PhysReg)) { + if (auto *Def = RDA.getReachingMIDef(MI, PhysReg)) { + SmallPtrSet Uses; + RDA.getReachingLocalUses(Def, PhysReg, Uses); + for (auto *Use : Uses) + if (!Ignore.count(Use)) + return false; + } else + return false; + } + + // Check for any defs after MI. + if (RDA.isRegDefinedAfter(MI, PhysReg)) { + auto I = MachineBasicBlock::iterator(MI); + for (auto E = MBB->end(); I != E; ++I) { + if (Ignore.count(&*I)) + continue; + for (auto &MO : I->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return false; + } + } + return true; +} + +bool +MachinePostRAUpdater::hasNewDefBefore(MachineInstr *MI, int PhysReg) const { + auto isBefore = [](MachineInstr *A, MachineInstr *B) { + return std::distance(MachineBasicBlock::iterator(A), + MachineBasicBlock::iterator(B)) > 0; + }; + + MachineBasicBlock *MBB = MI->getParent(); + if (!ModifiedBlocks.count(MBB)) + return false; + if (!ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return false; + for (auto *NewDef : ModifiedBlocks.at(MBB).getNewDefs(PhysReg)) + if (isBefore(NewDef, MI)) + return true; + return false; +} + +bool +MachinePostRAUpdater::hasLocalDefBefore(MachineInstr *MI, int PhysReg) const { + if (hasNewDefBefore(MI, PhysReg)) + return true; + return RDA.getReachingDef(MI, PhysReg) >= 0; +} + +bool MachinePostRAUpdater::isDefinedInBlock(MachineBasicBlock *MBB, + int PhysReg) const { + return hasLocalDefBefore(&MBB->back(), PhysReg) || + !isReachingDefLiveOut(&MBB->back(), PhysReg); +} + +void MachinePostRAUpdater::Finalize() { + LLVM_DEBUG(dbgs() << "PostRA Updater: Finalizing changes...\n"); + for (auto &Pair : ModifiedBlocks) + Pair.second.eraseDead(); +} + +void MachinePostRAUpdater::Replace(MachineInstr *Old, MachineInstr *New) { + assert(Old->getParent() == New->getParent() && + "Can't replace instructions in different blocks"); + LLVM_DEBUG(dbgs() << "PostRA Updater:\n- Replacing " << *Old + << "- With " << *New); + Insert(New); + Remove(Old); +} + +void MachinePostRAUpdater::Insert(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB)) + ModifiedBlocks.at(MBB).add(MI); + else + ModifiedBlocks.emplace(MBB, ModifiedBlock(MI)); +} + +void MachinePostRAUpdater::Remove(MachineInstr *MI) { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB)) + ModifiedBlocks.at(MBB).remove(MI); + else { + ModifiedBlocks.emplace(MBB, ModifiedBlock()); + ModifiedBlocks.at(MBB).remove(MI); + } +} + +bool +MachinePostRAUpdater::hasSameReachingDef(MachineInstr *A, MachineInstr *B, + int PhysReg) const { + // For now, just bail if we find a new def. + if ((ModifiedBlocks.count(A->getParent()) && + ModifiedBlocks.at(A->getParent()).hasNewDef(PhysReg)) || + (ModifiedBlocks.count(B->getParent()) && + ModifiedBlocks.at(B->getParent()).hasNewDef(PhysReg))) + return false; + return RDA.hasSameReachingDef(A, B, PhysReg); +} + +MachineInstr* MachinePostRAUpdater::getReachingDef(MachineInstr *MI, + int PhysReg) { + MachineBasicBlock *MBB = MI->getParent(); + if (hasNewDefBefore(MI, PhysReg)) + return nullptr; + + MachineInstr *Def = RDA.getReachingMIDef(MI, PhysReg); + if (ModifiedBlocks.count(MBB) && ModifiedBlocks.at(MBB).isDead(Def)) + return nullptr; + return Def; +} + +MachineInstr* MachinePostRAUpdater::getLocalLiveOutDef(MachineBasicBlock *MBB, + int PhysReg) const { + if (ModifiedBlocks.count(MBB) && ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return nullptr; + return RDA.getLocalLiveOutMIDef(MBB, PhysReg); +} + +bool MachinePostRAUpdater::isReachingDefLiveOut(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + if (ModifiedBlocks.count(MBB) && + ModifiedBlocks.at(MBB).hasNewDef(PhysReg)) + return false; + return RDA.isReachingDefLiveOut(MI, PhysReg); +} Index: llvm/lib/CodeGen/ReachingDefAnalysis.cpp =================================================================== --- llvm/lib/CodeGen/ReachingDefAnalysis.cpp +++ llvm/lib/CodeGen/ReachingDefAnalysis.cpp @@ -171,9 +171,9 @@ InstIds.clear(); } -int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) { +int ReachingDefAnalysis::getReachingDef(MachineInstr *MI, int PhysReg) const { assert(InstIds.count(MI) && "Unexpected machine instuction."); - int InstId = InstIds[MI]; + int InstId = InstIds.lookup(MI); int DefRes = ReachingDefDefaultVal; unsigned MBBNumber = MI->getParent()->getNumber(); assert(MBBNumber < MBBReachingDefs.size() && @@ -190,7 +190,8 @@ return LatestDef; } -MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, int PhysReg) { +MachineInstr* ReachingDefAnalysis::getReachingMIDef(MachineInstr *MI, + int PhysReg) const { return getInstFromId(MI->getParent(), getReachingDef(MI, PhysReg)); } @@ -205,7 +206,7 @@ } MachineInstr *ReachingDefAnalysis::getInstFromId(MachineBasicBlock *MBB, - int InstId) { + int InstId) const { assert(static_cast(MBB->getNumber()) < MBBReachingDefs.size() && "Unexpected basic block number."); assert(InstId < static_cast(MBB->size()) && @@ -215,7 +216,7 @@ return nullptr; for (auto &MI : *MBB) { - if (InstIds.count(&MI) && InstIds[&MI] == InstId) + if (InstIds.count(&MI) && InstIds.lookup(&MI) == InstId) return &MI; } return nullptr; @@ -227,7 +228,7 @@ } void ReachingDefAnalysis::getReachingLocalUses(MachineInstr *Def, int PhysReg, - SmallVectorImpl &Uses) { + SmallPtrSetImpl &Uses) { MachineBasicBlock *MBB = Def->getParent(); MachineBasicBlock::iterator MI = MachineBasicBlock::iterator(Def); while (++MI != MBB->end()) { @@ -240,20 +241,14 @@ if (getReachingMIDef(&*MI, PhysReg) != Def) return; - Uses.push_back(&*MI); + Uses.insert(&*MI); if (MO.isKill()) return; } } } -unsigned ReachingDefAnalysis::getNumUses(MachineInstr *Def, int PhysReg) { - SmallVector Uses; - getReachingLocalUses(Def, PhysReg, Uses); - return Uses.size(); -} - -bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) { +bool ReachingDefAnalysis::isRegUsedAfter(MachineInstr *MI, int PhysReg) const { MachineBasicBlock *MBB = MI->getParent(); LivePhysRegs LiveRegs(*TRI); LiveRegs.addLiveOuts(*MBB); @@ -267,11 +262,23 @@ for (auto Last = MBB->rbegin(), End = MBB->rend(); Last != End; ++Last) { LiveRegs.stepBackward(*Last); if (LiveRegs.contains(PhysReg)) - return InstIds[&*Last] > InstIds[MI]; + return InstIds.lookup(&*Last) > InstIds.lookup(MI); } return false; } +bool ReachingDefAnalysis::isRegDefinedAfter(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + if (getReachingDef(MI, PhysReg) != getReachingDef(&MBB->back(), PhysReg)) + return true; + + if (auto *Def = getLocalLiveOutMIDef(MBB, PhysReg)) + return Def == getReachingMIDef(MI, PhysReg); + + return false; +} + MachineInstr *ReachingDefAnalysis::getInstWithUseBefore(MachineInstr *MI, int PhysReg) { auto I = MachineBasicBlock::reverse_iterator(MI); @@ -287,12 +294,49 @@ } void ReachingDefAnalysis::getAllInstWithUseBefore(MachineInstr *MI, - int PhysReg, SmallVectorImpl &Uses) { + int PhysReg, SmallPtrSetImpl &Uses) { MachineInstr *Use = nullptr; MachineInstr *Pos = MI; while ((Use = getInstWithUseBefore(Pos, PhysReg))) { - Uses.push_back(Use); + Uses.insert(Use); Pos = Use; } } + +bool ReachingDefAnalysis::isReachingDefLiveOut(MachineInstr *MI, + int PhysReg) const { + MachineBasicBlock *MBB = MI->getParent(); + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return false; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(MI, PhysReg); + if (getReachingDef(Last, PhysReg) != Def) + return false; + + // Finally check that the last instruction doesn't redefine the register. + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return false; + + return true; +} + +MachineInstr* ReachingDefAnalysis::getLocalLiveOutMIDef(MachineBasicBlock *MBB, + int PhysReg) const { + LivePhysRegs LiveRegs(*TRI); + LiveRegs.addLiveOuts(*MBB); + if (!LiveRegs.contains(PhysReg)) + return nullptr; + + MachineInstr *Last = &MBB->back(); + int Def = getReachingDef(Last, PhysReg); + for (auto &MO : Last->operands()) + if (MO.isReg() && MO.isDef() && MO.getReg() == PhysReg) + return Last; + + return Def < 0 ? nullptr : getInstFromId(MBB, Def); +} Index: llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp =================================================================== --- llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp +++ llvm/lib/Target/ARM/ARMLowOverheadLoops.cpp @@ -26,8 +26,8 @@ #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineLoopUtils.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachinePostRAUpdater.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/ReachingDefAnalysis.h" #include "llvm/MC/MCInstrDesc.h" using namespace llvm; @@ -47,6 +47,7 @@ MachineInstr *End = nullptr; MachineInstr *VCTP = nullptr; SmallVector VPTUsers; + SmallPtrSet ToRemove; bool Revert = false; bool FoundOneVCTP = false; bool CannotTailPredicate = false; @@ -107,11 +108,12 @@ // Is it safe to define LR with DLS/WLS? // LR can be defined if it is the operand to start, because it's the same // value, or if it's going to be equivalent to the operand to Start. - MachineInstr *IsSafeToDefineLR(ReachingDefAnalysis *RDA); + MachineInstr *isSafeToDefineLR(MachinePostRAUpdater *Updater); // Check the branch targets are within range and we satisfy our // restrictions. - void CheckLegality(ARMBasicBlockUtils *BBUtils, ReachingDefAnalysis *RDA, + void CheckLegality(ARMBasicBlockUtils *BBUtils, + MachinePostRAUpdater *Updater, MachineLoopInfo *MLI); bool FoundAllComponents() const { @@ -148,7 +150,7 @@ class ARMLowOverheadLoops : public MachineFunctionPass { MachineFunction *MF = nullptr; MachineLoopInfo *MLI = nullptr; - ReachingDefAnalysis *RDA = nullptr; + std::unique_ptr Updater; const ARMBaseInstrInfo *TII = nullptr; MachineRegisterInfo *MRI = nullptr; const TargetRegisterInfo *TRI = nullptr; @@ -185,7 +187,7 @@ void RevertWhile(MachineInstr *MI) const; - bool RevertLoopDec(MachineInstr *MI, bool AllowFlags = false) const; + bool RevertLoopDec(MachineInstr *MI) const; void RevertLoopEnd(MachineInstr *MI, bool SkipCmp = false) const; @@ -205,7 +207,7 @@ INITIALIZE_PASS(ARMLowOverheadLoops, DEBUG_TYPE, ARM_LOW_OVERHEAD_LOOPS_NAME, false, false) -MachineInstr *LowOverheadLoop::IsSafeToDefineLR(ReachingDefAnalysis *RDA) { +MachineInstr *LowOverheadLoop::isSafeToDefineLR(MachinePostRAUpdater *Updater) { // We can define LR because LR already contains the same value. if (Start->getOperand(0).getReg() == ARM::LR) return Start; @@ -223,27 +225,41 @@ // Find an insertion point: // - Is there a (mov lr, Count) before Start? If so, and nothing else writes // to Count before Start, we can insert at that mov. + if (auto *LRDef = Updater->getReachingDef(Start, ARM::LR)) + if (IsMoveLR(LRDef) && Updater->hasSameReachingDef(Start, LRDef, CountReg)) + return LRDef; + // - Is there a (mov lr, Count) after Start? If so, and nothing else writes // to Count after Start, we can insert at that mov. - if (auto *LRDef = RDA->getReachingMIDef(&MBB->back(), ARM::LR)) { - if (IsMoveLR(LRDef) && RDA->hasSameReachingDef(Start, LRDef, CountReg)) + if (auto *LRDef = Updater->getLocalLiveOutDef(MBB, ARM::LR)) + if (IsMoveLR(LRDef) && Updater->hasSameReachingDef(Start, LRDef, CountReg)) return LRDef; - } // We've found no suitable LR def and Start doesn't use LR directly. Can we // just define LR anyway? - if (!RDA->isRegUsedAfter(Start, ARM::LR)) + if (Updater->isSafeToDefRegAt(Start, ARM::LR)) return Start; return nullptr; } void LowOverheadLoop::CheckLegality(ARMBasicBlockUtils *BBUtils, - ReachingDefAnalysis *RDA, + MachinePostRAUpdater *Updater, MachineLoopInfo *MLI) { if (Revert) return; + SmallPtrSet Ignore { Dec, End }; + if (!Updater->isSafeToRemove(Dec, ToRemove, Ignore)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Can't remove iteration count chain.\n"); + Revert = true; + return; + } else { + LLVM_DEBUG(dbgs() << "ARM Loops: Will remove:\n"; + for (auto *I : ToRemove) + dbgs() << " - " << *I;); + } + if (!End->getOperand(1).isMBB()) report_fatal_error("Expected LoopEnd to target basic block"); @@ -273,7 +289,7 @@ return; } - InsertPt = Revert ? nullptr : IsSafeToDefineLR(RDA); + InsertPt = Revert ? nullptr : isSafeToDefineLR(Updater); if (!InsertPt) { LLVM_DEBUG(dbgs() << "ARM Loops: Unable to find safe insertion point.\n"); Revert = true; @@ -293,28 +309,46 @@ // If the register is defined within loop, then we can't perform TP. // TODO: Check whether this is just a mov of a register that would be // available. - if (RDA->getReachingDef(VCTP, NumElements) >= 0) { + if (Updater->hasLocalDefBefore(VCTP, NumElements)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found in-loop element count def.\n"); CannotTailPredicate = true; return; } - // We can't perform TP if the register does not hold the same value at - // InsertPt as the liveout value. + // The element count register maybe defined after InsertPt, in which case we + // need to try to move either InsertPt or the def so that the [w|d]lstp can + // use the value. MachineBasicBlock *InsertBB = InsertPt->getParent(); - if (!RDA->hasSameReachingDef(InsertPt, &InsertBB->back(), - NumElements)) { - CannotTailPredicate = true; - return; + if (!Updater->isReachingDefLiveOut(InsertPt, NumElements)) { + if (auto *ElemDef = Updater->getLocalLiveOutDef(InsertBB, NumElements)) { + if (Updater->isSafeToMoveForwards(ElemDef, InsertPt)) { + Updater->MoveBefore(ElemDef, InsertPt); + LLVM_DEBUG(dbgs() << "ARM Loops: Moved element count def: " + << *ElemDef); + } else if (Updater->isSafeToMoveBackwards(InsertPt, ElemDef)) { + Updater->MoveAfter(ElemDef, InsertPt); + LLVM_DEBUG(dbgs() << "ARM Loops: Moved start past: " << *ElemDef); + } else { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to use element count in:\n" + << *InsertBB); + CannotTailPredicate = true; + return; + } + } } // Especially in the case of while loops, InsertBB may not be the // preheader, so we need to check that the register isn't redefined // before entering the loop. - auto CannotProvideElements = [&RDA](MachineBasicBlock *MBB, - Register NumElements) { - // NumElements is redefined in this block. - if (RDA->getReachingDef(&MBB->back(), NumElements) >= 0) + auto CannotProvideElements = [&Updater](MachineBasicBlock *MBB, + Register NumElements) { + if (Updater->isDefinedInBlock(MBB, NumElements)) { + LLVM_DEBUG(dbgs() << "ARM Loops: Found a def of element count:\n"; + if (auto *Def = Updater->getReachingDef(&MBB->back(), + NumElements)) + dbgs() << " - " << *Def); return true; + } // Don't continue searching up through multiple predecessors. if (MBB->pred_size() > 1) @@ -333,8 +367,11 @@ // Then search backwards for a def, until we get to InsertBB. while (MBB != InsertBB) { CannotTailPredicate = CannotProvideElements(MBB, NumElements); - if (CannotTailPredicate) + if (CannotTailPredicate) { + LLVM_DEBUG(dbgs() << "ARM Loops: Unable to use element count in:\n" + << *MBB); return; + } MBB = *MBB->pred_begin(); } @@ -352,7 +389,8 @@ LLVM_DEBUG(dbgs() << "ARM Loops on " << MF->getName() << " ------------- \n"); MLI = &getAnalysis(); - RDA = &getAnalysis(); + Updater = std::make_unique( + getAnalysis()); MF->getProperties().set(MachineFunctionProperties::Property::TracksLiveness); MRI = &MF->getRegInfo(); TII = static_cast(ST.getInstrInfo()); @@ -366,6 +404,7 @@ if (!ML->getParentLoop()) Changed |= ProcessLoop(ML); } + Updater->Finalize(); Changed |= RevertNonLoops(); return Changed; } @@ -438,28 +477,6 @@ // Check we know how to tail predicate any mve instructions. LoLoop.CheckTPValidity(&MI); } - - // We need to ensure that LR is not used or defined inbetween LoopDec and - // LoopEnd. - if (!LoLoop.Dec || LoLoop.End || LoLoop.Revert) - continue; - - // If we find that LR has been written or read between LoopDec and - // LoopEnd, expect that the decremented value is being used else where. - // Because this value isn't actually going to be produced until the - // latch, by LE, we would need to generate a real sub. The value is also - // likely to be copied/reloaded for use of LoopEnd - in which in case - // we'd need to perform an add because it gets subtracted again by LE! - // The other option is to then generate the other form of LE which doesn't - // perform the sub. - for (auto &MO : MI.operands()) { - if (MI.getOpcode() != ARM::t2LoopDec && MO.isReg() && - MO.getReg() == ARM::LR) { - LLVM_DEBUG(dbgs() << "ARM Loops: Found LR Use/Def: " << MI); - LoLoop.Revert = true; - break; - } - } } } @@ -467,7 +484,7 @@ if (!LoLoop.FoundAllComponents()) return false; - LoLoop.CheckLegality(BBUtils.get(), RDA, MLI); + LoLoop.CheckLegality(BBUtils.get(), Updater.get(), MLI); Expand(LoLoop); return true; } @@ -485,6 +502,7 @@ MIB.addImm(0); MIB.addImm(ARMCC::AL); MIB.addReg(ARM::NoRegister); + Updater->Insert(&*MIB); MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); unsigned BrOpc = BBUtils->isBBInRange(MI, DestBB, 254) ? @@ -494,19 +512,21 @@ MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::EQ); // condition code MIB.addReg(ARM::CPSR); - MI->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(MI); } -bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI, - bool SetFlags) const { +bool ARMLowOverheadLoops::RevertLoopDec(MachineInstr *MI) const { LLVM_DEBUG(dbgs() << "ARM Loops: Reverting to sub: " << *MI); MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *Last = &MBB->back(); + + SmallPtrSet Ignore; + if (Last->getOpcode() == ARM::t2LoopEnd) + Ignore.insert(Last); // If nothing defines CPSR between LoopDec and LoopEnd, use a t2SUBS. - if (SetFlags && - (RDA->isRegUsedAfter(MI, ARM::CPSR) || - !RDA->hasSameReachingDef(MI, &MBB->back(), ARM::CPSR))) - SetFlags = false; + bool SetFlags = Updater->isSafeToDefRegAt(MI, ARM::CPSR, Ignore); MachineInstrBuilder MIB = BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); @@ -522,7 +542,8 @@ } else MIB.addReg(0); - MI->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(MI); return SetFlags; } @@ -539,6 +560,7 @@ MIB.addImm(0); MIB.addImm(ARMCC::AL); MIB.addReg(ARM::NoRegister); + Updater->Insert(&*MIB); } MachineBasicBlock *DestBB = MI->getOperand(1).getMBB(); @@ -551,7 +573,8 @@ MIB.add(MI->getOperand(1)); // branch target MIB.addImm(ARMCC::NE); // condition code MIB.addReg(ARM::CPSR); - MI->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(MI); } MachineInstr* ARMLowOverheadLoops::ExpandLoopStart(LowOverheadLoop &LoLoop) { @@ -572,35 +595,37 @@ // When using tail-predication, try to delete the dead code that was used to // calculate the number of loop iterations. + // TODO: Extend MachinePostRAUpdater::isSafeToRemove to look into other + // blocks. if (LoLoop.IsTailPredicationLegal()) { SmallVector Killed; - SmallVector Dead; - if (auto *Def = RDA->getReachingMIDef(Start, - Start->getOperand(0).getReg())) { + SmallPtrSet Dead; + if (auto *Def = Updater->getReachingDef(Start, + Start->getOperand(0).getReg())) { Killed.push_back(Def); - while (!Killed.empty()) { MachineInstr *Def = Killed.back(); Killed.pop_back(); - Dead.push_back(Def); + Dead.insert(Def); for (auto &MO : Def->operands()) { if (!MO.isReg() || !MO.isKill()) continue; - MachineInstr *Kill = RDA->getReachingMIDef(Def, MO.getReg()); - if (Kill && RDA->getNumUses(Kill, MO.getReg()) == 1) - Killed.push_back(Kill); + if (MachineInstr *Kill = Updater->getReachingDef(Def, MO.getReg())) + if (Updater->isSafeToRemove(Kill, Dead)) + Killed.push_back(Kill); } } for (auto *MI : Dead) - MI->eraseFromParent(); + Updater->Remove(MI); } } // If we're inserting at a mov lr, then remove it as it's redundant. if (InsertPt != Start) - InsertPt->eraseFromParent(); - Start->eraseFromParent(); + Updater->Remove(InsertPt); + Updater->Remove(Start); + Updater->Insert(&*MIB); LLVM_DEBUG(dbgs() << "ARM Loops: Inserted start: " << *MIB); return &*MIB; } @@ -633,51 +658,26 @@ LoLoop.VCTP->getOperand(1).dump()); // Find the definition we are interested in removing, if there is one. - MachineInstr *Def = RDA->getReachingMIDef(LastInstrInBlock, ElemCount); + MachineInstr *Def = Updater->getReachingDef(LastInstrInBlock, ElemCount); if (!Def) return; - // Bail if we define CPSR and it is not dead - if (!Def->registerDefIsDead(ARM::CPSR, TRI)) { - LLVM_DEBUG(dbgs() << "ARM Loops: CPSR is not dead\n"); - return; - } - - // Bail if elemcount is used in exit blocks, i.e. if it is live-in. - if (isRegLiveInExitBlocks(LoLoop.ML, ElemCount)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Elemcount is live-out, can't remove stmt\n"); - return; - } - - // Bail if there are uses after this Def in the block. - SmallVector Uses; - RDA->getReachingLocalUses(Def, ElemCount, Uses); - if (Uses.size()) { - LLVM_DEBUG(dbgs() << "ARM Loops: Local uses in block, can't remove stmt\n"); - return; - } - - Uses.clear(); - RDA->getAllInstWithUseBefore(Def, ElemCount, Uses); - - // Remove Def if there are no uses, or if the only use is the VCTP - // instruction. - if (!Uses.size() || (Uses.size() == 1 && Uses[0] == LoLoop.VCTP)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing loop update instruction: "; - Def->dump()); - Def->eraseFromParent(); + SmallPtrSet Ignore = { LoLoop.VCTP }; + SmallPtrSet Remove; + if (Updater->isSafeToRemove(Def, Remove, Ignore)) { + Updater->Remove(Def); + for (auto *I : Remove) + Updater->Remove(I); } } void ARMLowOverheadLoops::RemoveVPTBlocks(LowOverheadLoop &LoLoop) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VCTP: " << *LoLoop.VCTP); - LoLoop.VCTP->eraseFromParent(); + Updater->Remove(LoLoop.VCTP); for (auto *MI : LoLoop.VPTUsers) { - if (MI->getOpcode() == ARM::MVE_VPST) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing VPST: " << *MI); - MI->eraseFromParent(); - } else { + if (MI->getOpcode() == ARM::MVE_VPST) + Updater->Remove(MI); + else { unsigned OpNum = MI->getNumOperands() - 1; assert((MI->getOperand(OpNum).isReg() && MI->getOperand(OpNum).getReg() == ARM::VPR) && @@ -705,10 +705,10 @@ MIB.addDef(ARM::LR); MIB.add(End->getOperand(0)); MIB.add(End->getOperand(1)); - LLVM_DEBUG(dbgs() << "ARM Loops: Inserted LE: " << *MIB); - LoLoop.End->eraseFromParent(); - LoLoop.Dec->eraseFromParent(); + Updater->Insert(&*MIB); + Updater->Remove(LoLoop.End); + Updater->Remove(LoLoop.Dec); return &*MIB; }; @@ -717,15 +717,13 @@ // instructions. // If there is an unconditional branch, after I, that just branches to the // next block, remove it. - auto RemoveDeadBranch = [](MachineInstr *I) { + auto RemoveDeadBranch = [this](MachineInstr *I) { MachineBasicBlock *BB = I->getParent(); MachineInstr *Terminator = &BB->instr_back(); if (Terminator->isUnconditionalBranch() && I != Terminator) { MachineBasicBlock *Succ = Terminator->getOperand(0).getMBB(); - if (BB->isLayoutSuccessor(Succ)) { - LLVM_DEBUG(dbgs() << "ARM Loops: Removing branch: " << *Terminator); - Terminator->eraseFromParent(); - } + if (BB->isLayoutSuccessor(Succ)) + Updater->Remove(Terminator); } }; @@ -734,13 +732,15 @@ RevertWhile(LoLoop.Start); else LoLoop.Start->eraseFromParent(); - bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec, true); + bool FlagsAlreadySet = RevertLoopDec(LoLoop.Dec); RevertLoopEnd(LoLoop.End, FlagsAlreadySet); } else { LoLoop.Start = ExpandLoopStart(LoLoop); RemoveDeadBranch(LoLoop.Start); LoLoop.End = ExpandLoopEnd(LoLoop); RemoveDeadBranch(LoLoop.End); + for (auto *I : LoLoop.ToRemove) + Updater->Remove(I); if (LoLoop.IsTailPredicationLegal()) { RemoveLoopUpdate(LoLoop); RemoveVPTBlocks(LoLoop); Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/dont-remove-loop-update2.mir @@ -167,6 +167,7 @@ tB %bb.2, 14, $noreg bb.2.for.cond.cleanup: + liveins: $cpsr tPOP_RET 14, $noreg, def $r7, def $pc ... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll @@ -388,7 +388,6 @@ ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB5_5 @@ -593,7 +592,6 @@ ; CHECK-NEXT: vldrh.s32 q0, [r0], #8 ; CHECK-NEXT: vldrh.s32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB6_1 @@ -685,7 +683,6 @@ ; CHECK-NEXT: vldrb.u32 q1, [r5] ; CHECK-NEXT: vmul.i32 q0, q1, q0 ; CHECK-NEXT: adds r4, #4 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB7_5 @@ -890,7 +887,6 @@ ; CHECK-NEXT: vldrh.u32 q0, [r0], #8 ; CHECK-NEXT: vldrh.u32 q1, [r1], #8 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB8_1 @@ -980,7 +976,6 @@ ; CHECK-NEXT: vldrw.u32 q0, [r0], #16 ; CHECK-NEXT: vldrw.u32 q1, [r1], #16 ; CHECK-NEXT: vmul.i32 q0, q1, q0 -; CHECK-NEXT: sub.w r12, r12, #4 ; CHECK-NEXT: vadd.i32 q0, q0, r2 ; CHECK-NEXT: vstrw.32 q0, [r3], #16 ; CHECK-NEXT: letp lr, .LBB9_5 Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-lr-mov.mir =================================================================== --- /dev/null +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/remove-lr-mov.mir @@ -0,0 +1,334 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=thumbv8.1m.main -mattr=mve.fp -run-pass=arm-low-overhead-loops %s -o - | FileCheck %s + +--- | + define dso_local arm_aapcs_vfpcc void @remove_mov_lr_chain(float* nocapture readonly %pSrc, float* nocapture %pDst, i32 %blockSize) #0 { + entry: + %cmp5 = icmp eq i32 %blockSize, 0 + br i1 %cmp5, label %while.end, label %while.body.preheader + + while.body.preheader: ; preds = %entry + %min.iters.check = icmp ult i32 %blockSize, 4 + br i1 %min.iters.check, label %while.body.preheader19, label %vector.memcheck + + vector.memcheck: ; preds = %while.body.preheader + %scevgep = getelementptr float, float* %pDst, i32 %blockSize + %scevgep12 = getelementptr float, float* %pSrc, i32 %blockSize + %bound0 = icmp ugt float* %scevgep12, %pDst + %bound1 = icmp ugt float* %scevgep, %pSrc + %found.conflict = and i1 %bound0, %bound1 + %0 = lshr i32 %blockSize, 2 + %1 = shl nuw i32 %0, 2 + %2 = add i32 %1, -4 + %3 = lshr i32 %2, 2 + %4 = add nuw nsw i32 %3, 1 + br i1 %found.conflict, label %while.body.preheader19, label %vector.ph + + vector.ph: ; preds = %vector.memcheck + %n.vec = and i32 %blockSize, -4 + %ind.end = sub i32 %blockSize, %n.vec + %ind.end15 = getelementptr float, float* %pSrc, i32 %n.vec + %ind.end17 = getelementptr float, float* %pDst, i32 %n.vec + %scevgep9 = getelementptr float, float* %pDst, i32 -4 + %scevgep14 = getelementptr float, float* %pSrc, i32 -4 + call void @llvm.set.loop.iterations.i32(i32 %4) + br label %vector.body + + vector.body: ; preds = %vector.body, %vector.ph + %lsr.iv15 = phi float* [ %scevgep16, %vector.body ], [ %scevgep14, %vector.ph ] + %lsr.iv10 = phi float* [ %scevgep11, %vector.body ], [ %scevgep9, %vector.ph ] + %5 = phi i32 [ %4, %vector.ph ], [ %7, %vector.body ] + %lsr.iv1517 = bitcast float* %lsr.iv15 to <4 x float>* + %lsr.iv1012 = bitcast float* %lsr.iv10 to <4 x float>* + %scevgep18 = getelementptr <4 x float>, <4 x float>* %lsr.iv1517, i32 1 + %wide.load = load <4 x float>, <4 x float>* %scevgep18, align 4 + %6 = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> %wide.load) + %scevgep13 = getelementptr <4 x float>, <4 x float>* %lsr.iv1012, i32 1 + store <4 x float> %6, <4 x float>* %scevgep13, align 4 + %scevgep11 = getelementptr float, float* %lsr.iv10, i32 4 + %scevgep16 = getelementptr float, float* %lsr.iv15, i32 4 + %7 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %5, i32 1) + %8 = icmp ne i32 %7, 0 + br i1 %8, label %vector.body, label %middle.block + + middle.block: ; preds = %vector.body + %cmp.n = icmp eq i32 %n.vec, %blockSize + br i1 %cmp.n, label %while.end, label %while.body.preheader19 + + while.body.preheader19: ; preds = %middle.block, %vector.memcheck, %while.body.preheader + %blkCnt.08.ph = phi i32 [ %blockSize, %vector.memcheck ], [ %blockSize, %while.body.preheader ], [ %ind.end, %middle.block ] + %pSrc.addr.07.ph = phi float* [ %pSrc, %vector.memcheck ], [ %pSrc, %while.body.preheader ], [ %ind.end15, %middle.block ] + %pDst.addr.06.ph = phi float* [ %pDst, %vector.memcheck ], [ %pDst, %while.body.preheader ], [ %ind.end17, %middle.block ] + %scevgep1 = getelementptr float, float* %pSrc.addr.07.ph, i32 -1 + %scevgep4 = getelementptr float, float* %pDst.addr.06.ph, i32 -1 + call void @llvm.set.loop.iterations.i32(i32 %blkCnt.08.ph) + br label %while.body + + while.body: ; preds = %while.body, %while.body.preheader19 + %lsr.iv5 = phi float* [ %scevgep6, %while.body ], [ %scevgep4, %while.body.preheader19 ] + %lsr.iv = phi float* [ %scevgep2, %while.body ], [ %scevgep1, %while.body.preheader19 ] + %9 = phi i32 [ %blkCnt.08.ph, %while.body.preheader19 ], [ %12, %while.body ] + %scevgep3 = getelementptr float, float* %lsr.iv, i32 1 + %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1 + %10 = load float, float* %scevgep3, align 4 + %11 = tail call fast float @llvm.fabs.f32(float %10) + store float %11, float* %scevgep7, align 4 + %scevgep2 = getelementptr float, float* %lsr.iv, i32 1 + %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1 + %12 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %9, i32 1) + %13 = icmp ne i32 %12, 0 + br i1 %13, label %while.body, label %while.end + + while.end: ; preds = %while.body, %middle.block, %entry + ret void + } + ; Function Attrs: nounwind readnone speculatable willreturn + declare float @llvm.fabs.f32(float) #1 + ; Function Attrs: nounwind readnone speculatable willreturn + declare <4 x float> @llvm.fabs.v4f32(<4 x float>) #1 + ; Function Attrs: noduplicate nounwind + declare void @llvm.set.loop.iterations.i32(i32) #2 + ; Function Attrs: noduplicate nounwind + declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) #2 + ; Function Attrs: nounwind + declare void @llvm.stackprotector(i8*, i8**) #3 + + attributes #0 = { "target-features"="+mve.fp" } + attributes #1 = { nounwind readnone speculatable willreturn "target-features"="+mve.fp" } + attributes #2 = { noduplicate nounwind } + attributes #3 = { nounwind } + +... +--- +name: remove_mov_lr_chain +alignment: 2 +exposesReturnsTwice: false +legalized: false +regBankSelected: false +selected: false +failedISel: false +tracksRegLiveness: true +hasWinCFI: false +registers: [] +liveins: + - { reg: '$r0', virtual-reg: '' } + - { reg: '$r1', virtual-reg: '' } + - { reg: '$r2', virtual-reg: '' } +frameInfo: + isFrameAddressTaken: false + isReturnAddressTaken: false + hasStackMap: false + hasPatchPoint: false + stackSize: 16 + offsetAdjustment: 0 + maxAlignment: 4 + adjustsStack: false + hasCalls: false + stackProtector: '' + maxCallFrameSize: 0 + cvBytesOfCalleeSavedRegisters: 0 + hasOpaqueSPAdjustment: false + hasVAStart: false + hasMustTailInVarArgFunc: false + localFrameSize: 0 + savePoint: '' + restorePoint: '' +fixedStack: [] +stack: + - { id: 0, name: '', type: spill-slot, offset: -4, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$lr', callee-saved-restored: false, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 1, name: '', type: spill-slot, offset: -8, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r7', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 2, name: '', type: spill-slot, offset: -12, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r5', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } + - { id: 3, name: '', type: spill-slot, offset: -16, size: 4, alignment: 4, + stack-id: default, callee-saved-register: '$r4', callee-saved-restored: true, + debug-info-variable: '', debug-info-expression: '', debug-info-location: '' } +callSites: [] +constants: [] +machineFunctionInfo: {} +body: | + ; CHECK-LABEL: name: remove_mov_lr_chain + ; CHECK: bb.0.entry: + ; CHECK: successors: %bb.9(0x30000000), %bb.1(0x50000000) + ; CHECK: liveins: $r0, $r1, $r2, $r4, $r5, $r7, $lr + ; CHECK: frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp + ; CHECK: frame-setup CFI_INSTRUCTION def_cfa_offset 16 + ; CHECK: frame-setup CFI_INSTRUCTION offset $lr, -4 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r7, -8 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r5, -12 + ; CHECK: frame-setup CFI_INSTRUCTION offset $r4, -16 + ; CHECK: tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.9, 0, killed $cpsr + ; CHECK: bb.1.while.body.preheader: + ; CHECK: successors: %bb.6(0x40000000), %bb.2(0x40000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: tCMPi8 renamable $r2, 4, 14, $noreg, implicit-def $cpsr + ; CHECK: tBcc %bb.6, 3, killed $cpsr + ; CHECK: bb.2.vector.memcheck: + ; CHECK: successors: %bb.3(0x40000000), %bb.6(0x40000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: renamable $r3 = t2ADDrs renamable $r0, renamable $r2, 18, 14, $noreg, $noreg + ; CHECK: tCMPr killed renamable $r3, renamable $r1, 14, $noreg, implicit-def $cpsr + ; CHECK: t2IT 8, 4, implicit-def $itstate + ; CHECK: renamable $r3 = t2ADDrs renamable $r1, renamable $r2, 18, 8, $cpsr, $noreg, implicit $itstate + ; CHECK: tCMPr killed renamable $r3, renamable $r0, 8, killed $cpsr, implicit-def $cpsr, implicit killed $itstate + ; CHECK: tBcc %bb.6, 8, killed $cpsr + ; CHECK: bb.3.vector.ph: + ; CHECK: successors: %bb.4(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: renamable $r4 = t2BICri renamable $r2, 3, 14, $noreg, $noreg + ; CHECK: renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + ; CHECK: renamable $r12 = t2SUBri renamable $r4, 4, 14, $noreg, $noreg + ; CHECK: renamable $r7, dead $cpsr = tSUBrr renamable $r2, renamable $r4, 14, $noreg + ; CHECK: renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + ; CHECK: renamable $r12 = t2ADDrs renamable $r0, renamable $r4, 18, 14, $noreg, $noreg + ; CHECK: $lr = t2DLS renamable $r3 + ; CHECK: renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 16, 14, $noreg + ; CHECK: $r5 = tMOVr killed $r3, 14, $noreg + ; CHECK: renamable $r3 = t2ADDrs renamable $r1, renamable $r4, 18, 14, $noreg, $noreg + ; CHECK: renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 16, 14, $noreg + ; CHECK: bb.4.vector.body: + ; CHECK: successors: %bb.4(0x7c000000), %bb.5(0x04000000) + ; CHECK: liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r7, $r12 + ; CHECK: renamable $r0, renamable $q0 = MVE_VLDRWU32_pre killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.scevgep18, align 4) + ; CHECK: renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VABSf32 killed renamable $q0, 0, $noreg, undef renamable $q0 + ; CHECK: renamable $r1 = MVE_VSTRBU8_pre killed renamable $q0, killed renamable $r1, 16, 0, $noreg :: (store 16 into %ir.scevgep13, align 4) + ; CHECK: $lr = t2LEUpdate killed renamable $lr, %bb.4 + ; CHECK: bb.5.middle.block: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: liveins: $r2, $r3, $r4, $r7, $r12 + ; CHECK: tCMPr killed renamable $r4, killed renamable $r2, 14, $noreg, implicit-def $cpsr + ; CHECK: $lr = tMOVr killed $r7, 14, $noreg + ; CHECK: t2IT 0, 8, implicit-def $itstate + ; CHECK: tPOP_RET 0, killed $cpsr, def $r4, def $r5, def $r7, def $pc, implicit killed $itstate + ; CHECK: tB %bb.7, 14, $noreg + ; CHECK: bb.6: + ; CHECK: successors: %bb.7(0x80000000) + ; CHECK: liveins: $r0, $r1, $r2 + ; CHECK: $lr = tMOVr killed $r2, 14, $noreg + ; CHECK: $r12 = tMOVr killed $r0, 14, $noreg + ; CHECK: $r3 = tMOVr killed $r1, 14, $noreg + ; CHECK: bb.7.while.body.preheader19: + ; CHECK: successors: %bb.8(0x80000000) + ; CHECK: liveins: $lr, $r3, $r12 + ; CHECK: renamable $r0, dead $cpsr = tSUBi3 killed renamable $r3, 4, 14, $noreg + ; CHECK: renamable $r1 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg + ; CHECK: $lr = t2DLS renamable $lr + ; CHECK: bb.8.while.body: + ; CHECK: successors: %bb.8(0x7c000000), %bb.9(0x04000000) + ; CHECK: liveins: $lr, $r0, $r1 + ; CHECK: renamable $s0 = VLDRS renamable $r1, 1, 14, $noreg :: (load 4 from %ir.scevgep3) + ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg + ; CHECK: renamable $s0 = nnan ninf nsz arcp contract afn reassoc VABSS killed renamable $s0, 14, $noreg + ; CHECK: VSTRS killed renamable $s0, renamable $r0, 1, 14, $noreg :: (store 4 into %ir.scevgep7) + ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg + ; CHECK: $lr = t2LEUpdate renamable $lr, %bb.8 + ; CHECK: bb.9.while.end: + ; CHECK: tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc + bb.0.entry: + successors: %bb.9(0x30000000), %bb.1(0x50000000) + liveins: $r0, $r1, $r2, $r4, $r5, $r7, $lr + + frame-setup tPUSH 14, $noreg, killed $r4, killed $r5, killed $r7, killed $lr, implicit-def $sp, implicit $sp + frame-setup CFI_INSTRUCTION def_cfa_offset 16 + frame-setup CFI_INSTRUCTION offset $lr, -4 + frame-setup CFI_INSTRUCTION offset $r7, -8 + frame-setup CFI_INSTRUCTION offset $r5, -12 + frame-setup CFI_INSTRUCTION offset $r4, -16 + tCMPi8 renamable $r2, 0, 14, $noreg, implicit-def $cpsr + tBcc %bb.9, 0, killed $cpsr + + bb.1.while.body.preheader: + successors: %bb.6(0x40000000), %bb.2(0x40000000) + liveins: $r0, $r1, $r2 + + tCMPi8 renamable $r2, 4, 14, $noreg, implicit-def $cpsr + tBcc %bb.6, 3, killed $cpsr + + bb.2.vector.memcheck: + successors: %bb.3(0x40000000), %bb.6(0x40000000) + liveins: $r0, $r1, $r2 + + renamable $r3 = t2ADDrs renamable $r0, renamable $r2, 18, 14, $noreg, $noreg + tCMPr killed renamable $r3, renamable $r1, 14, $noreg, implicit-def $cpsr + t2IT 8, 4, implicit-def $itstate + renamable $r3 = t2ADDrs renamable $r1, renamable $r2, 18, 8, $cpsr, $noreg, implicit $itstate + tCMPr killed renamable $r3, renamable $r0, 8, killed $cpsr, implicit-def $cpsr, implicit killed $itstate + tBcc %bb.6, 8, killed $cpsr + + bb.3.vector.ph: + successors: %bb.4(0x80000000) + liveins: $r0, $r1, $r2 + + renamable $r4 = t2BICri renamable $r2, 3, 14, $noreg, $noreg + renamable $r3, dead $cpsr = tMOVi8 1, 14, $noreg + renamable $r12 = t2SUBri renamable $r4, 4, 14, $noreg, $noreg + renamable $r7, dead $cpsr = tSUBrr renamable $r2, renamable $r4, 14, $noreg + renamable $r3 = nuw nsw t2ADDrs killed renamable $r3, killed renamable $r12, 19, 14, $noreg, $noreg + renamable $r12 = t2ADDrs renamable $r0, renamable $r4, 18, 14, $noreg, $noreg + t2DoLoopStart renamable $r3 + renamable $r0, dead $cpsr = tSUBi8 killed renamable $r0, 16, 14, $noreg + $r5 = tMOVr killed $r3, 14, $noreg + renamable $r3 = t2ADDrs renamable $r1, renamable $r4, 18, 14, $noreg, $noreg + renamable $r1, dead $cpsr = tSUBi8 killed renamable $r1, 16, 14, $noreg + + bb.4.vector.body: + successors: %bb.4(0x7c000000), %bb.5(0x04000000) + liveins: $r0, $r1, $r2, $r3, $r4, $r5, $r7, $r12 + + renamable $r0, renamable $q0 = MVE_VLDRWU32_pre killed renamable $r0, 16, 0, $noreg :: (load 16 from %ir.scevgep18, align 4) + $lr = tMOVr killed $r5, 14, $noreg + renamable $q0 = nnan ninf nsz arcp contract afn reassoc MVE_VABSf32 killed renamable $q0, 0, $noreg, undef renamable $q0 + renamable $r1 = MVE_VSTRBU8_pre killed renamable $q0, killed renamable $r1, 16, 0, $noreg :: (store 16 into %ir.scevgep13, align 4) + renamable $lr = t2LoopDec killed renamable $lr, 1 + $r5 = tMOVr $lr, 14, $noreg + t2LoopEnd killed renamable $lr, %bb.4, implicit-def dead $cpsr + tB %bb.5, 14, $noreg + + bb.5.middle.block: + successors: %bb.7(0x80000000) + liveins: $r2, $r3, $r4, $r7, $r12 + + tCMPr killed renamable $r4, killed renamable $r2, 14, $noreg, implicit-def $cpsr + $lr = tMOVr killed $r7, 14, $noreg + t2IT 0, 8, implicit-def $itstate + tPOP_RET 0, killed $cpsr, def $r4, def $r5, def $r7, def $pc, implicit killed $itstate + tB %bb.7, 14, $noreg + + bb.6: + successors: %bb.7(0x80000000) + liveins: $r0, $r1, $r2 + + $lr = tMOVr killed $r2, 14, $noreg + $r12 = tMOVr killed $r0, 14, $noreg + $r3 = tMOVr killed $r1, 14, $noreg + + bb.7.while.body.preheader19: + successors: %bb.8(0x80000000) + liveins: $lr, $r3, $r12 + + renamable $r0, dead $cpsr = tSUBi3 killed renamable $r3, 4, 14, $noreg + renamable $r1 = t2SUBri killed renamable $r12, 4, 14, $noreg, $noreg + t2DoLoopStart renamable $lr + + bb.8.while.body: + successors: %bb.8(0x7c000000), %bb.9(0x04000000) + liveins: $lr, $r0, $r1 + + renamable $s0 = VLDRS renamable $r1, 1, 14, $noreg :: (load 4 from %ir.scevgep3) + renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 4, 14, $noreg + renamable $s0 = nnan ninf nsz arcp contract afn reassoc VABSS killed renamable $s0, 14, $noreg + VSTRS killed renamable $s0, renamable $r0, 1, 14, $noreg :: (store 4 into %ir.scevgep7) + renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 4, 14, $noreg + renamable $lr = t2LoopDec killed renamable $lr, 1 + t2LoopEnd renamable $lr, %bb.8, implicit-def dead $cpsr + tB %bb.9, 14, $noreg + + bb.9.while.end: + tPOP_RET 14, $noreg, def $r4, def $r5, def $r7, def $pc + +... Index: llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir =================================================================== --- llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir +++ llvm/test/CodeGen/Thumb2/LowOverheadLoops/wlstp.mir @@ -210,7 +210,6 @@ ; CHECK: renamable $q1 = MVE_VLDRBU8 killed renamable $r4, 0, 0, $noreg :: (load 16 from %ir.scevgep23, align 1) ; CHECK: renamable $r4 = t2ADDrr renamable $r0, renamable $r12, 14, $noreg, $noreg ; CHECK: renamable $r12 = t2ADDri killed renamable $r12, 16, 14, $noreg, $noreg - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 16, 14, $noreg ; CHECK: renamable $q0 = MVE_VMULi8 killed renamable $q1, killed renamable $q0, 0, $noreg, undef renamable $q0 ; CHECK: MVE_VSTRBU8 killed renamable $q0, killed renamable $r4, 0, 0, killed $noreg :: (store 16 into %ir.scevgep1, align 1) ; CHECK: $lr = MVE_LETP renamable $lr, %bb.2 @@ -330,7 +329,6 @@ ; CHECK: renamable $r1, dead $cpsr = tADDi8 killed renamable $r1, 16, 14, $noreg ; CHECK: renamable $r2, dead $cpsr = tADDi8 killed renamable $r2, 16, 14, $noreg ; CHECK: renamable $r0, dead $cpsr = tADDi8 killed renamable $r0, 16, 14, $noreg - ; CHECK: renamable $r3, dead $cpsr = tSUBi8 killed renamable $r3, 8, 14, $noreg ; CHECK: $lr = MVE_LETP renamable $lr, %bb.1 ; CHECK: bb.2.for.cond.cleanup: ; CHECK: tPOP_RET 14, $noreg, def $r7, def $pc