diff --git a/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/llvm/lib/CodeGen/MachineCopyPropagation.cpp --- a/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -85,6 +85,69 @@ namespace { +/// Check that \p MI does not have implicit uses that overlap with it's \p Use +/// operand (the register being replaced), since these can sometimes be +/// implicitly tied to other operands. For example, on AMDGPU: +/// +/// V_MOVRELS_B32_e32 %VGPR2, %M0, %EXEC, +/// %VGPR2_VGPR3_VGPR4_VGPR5 +/// +/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no +/// way of knowing we need to update the latter when updating the former. +bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use, + const TargetRegisterInfo &TRI) { + for (const MachineOperand &MIUse : MI.uses()) + if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() && + MIUse.isUse() && TRI.regsOverlap(Use.getReg(), MIUse.getReg())) + return true; + + return false; +} + +/// Decide whether we should forward the source of \param Copy to its use in +/// \param UseI based on the physical register class constraints of the opcode +/// and avoiding introducing more cross-class COPYs. +bool isForwardableRegClassCopy(const Register &NewReg, const MachineInstr &UseI, + unsigned UseIdx, const TargetInstrInfo &TII, + const TargetRegisterInfo &TRI) { + + // If the new register meets the opcode register constraints, then allow + // forwarding. + if (const TargetRegisterClass *URC = + UseI.getRegClassConstraint(UseIdx, &TII, &TRI)) + return URC->contains(NewReg); + + if (!UseI.isCopy()) + return false; + + /// COPYs don't have register class constraints, so if the user instruction + /// is a COPY, we just try to avoid introducing additional cross-class + /// COPYs. For example: + /// + /// RegClassA = COPY RegClassB // Copy parameter + /// ... + /// RegClassB = COPY RegClassA // UseI parameter + /// + /// which after forwarding becomes + /// + /// RegClassA = COPY RegClassB + /// ... + /// RegClassB = COPY RegClassB + /// + /// so we have reduced the number of cross-class COPYs and potentially + /// introduced a nop COPY that can be removed. + const TargetRegisterClass *UseDstRC = + TRI.getMinimalPhysRegClass(UseI.getOperand(0).getReg()); + + const TargetRegisterClass *SuperRC = UseDstRC; + for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); + SuperRC; SuperRC = *SuperRCI++) + if (SuperRC->contains(NewReg)) + return true; + + return false; +} + class CopyTracker { struct CopyInfo { MachineInstr *MI; @@ -94,6 +157,10 @@ DenseMap Copies; + /// Mapping registers to where they are used later. Key is actual registers + /// (not register units) Only used for Backward Propagation + DenseMap> Uses; + public: /// Mark all of the given registers and their subregisters as unavailable for /// copying. @@ -127,9 +194,11 @@ I->second.DefRegs.end()); } } - for (MCRegister InvalidReg : RegsToInvalidate) + for (MCRegister InvalidReg : RegsToInvalidate) { for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI) Copies.erase(*RUI); + Uses.erase(InvalidReg); + } } /// Clobber a single register, removing it from the tracker's copy maps. @@ -171,9 +240,7 @@ } } - bool hasAnyCopies() { - return !Copies.empty(); - } + bool hasAnyCopies() { return !Copies.empty(); } MachineInstr *findCopyForUnit(MCRegister RegUnit, const TargetRegisterInfo &TRI, @@ -186,6 +253,8 @@ return CI->second.MI; } + /// Find the corresponding copy instruction where \p RegUnit appeared as the + /// Src MachineInstr *findCopyDefViaUnit(MCRegister RegUnit, const TargetRegisterInfo &TRI) { auto CI = Copies.find(RegUnit); @@ -197,13 +266,14 @@ return findCopyForUnit(*RUI, TRI, true); } - MachineInstr *findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, - const TargetRegisterInfo &TRI) { + std::pair> + findAvailBackwardCopy(MachineInstr &I, MCRegister Reg, + const TargetRegisterInfo &TRI) { MCRegUnitIterator RUI(Reg, &TRI); MachineInstr *AvailCopy = findCopyDefViaUnit(*RUI, TRI); if (!AvailCopy || !TRI.isSubRegisterEq(AvailCopy->getOperand(1).getReg(), Reg)) - return nullptr; + return {nullptr, {}}; Register AvailSrc = AvailCopy->getOperand(1).getReg(); Register AvailDef = AvailCopy->getOperand(0).getReg(); @@ -213,9 +283,100 @@ if (MO.isRegMask()) // FIXME: Shall we simultaneously invalidate AvailSrc or AvailDef? if (MO.clobbersPhysReg(AvailSrc) || MO.clobbersPhysReg(AvailDef)) - return nullptr; + return {nullptr, {}}; - return AvailCopy; + auto UseI = Uses.find(AvailSrc); + return {AvailCopy, + UseI == Uses.end() ? SmallVector{} : UseI->second}; + } + + /// Track uses of registers that are candidate for backward copy propagation + void trackUse(MachineInstr *MI, const TargetRegisterInfo &TRI, + const TargetInstrInfo &TII) { + SmallSet RegsToInvalidate; + for (unsigned OpIdx = 0, OpEnd = MI->getNumOperands(); OpIdx != OpEnd; + ++OpIdx) { + MachineOperand &MOUse = MI->getOperand(OpIdx); + if (!MOUse.isReg() || !MOUse.isUse() || MOUse.isUndef()) { + continue; + } + if (!MOUse.getReg()) { + continue; + } + + MCRegister Use = MOUse.getReg().asMCReg(); + + // Three cases where we should give up propagating copies: + // + // 1) If a read register overlaps, but is not equal to, some of the + // candidate src registers, we need to give up on propagating those + // overlapping registers. + // + // 2) If a copy candidate's Def is read or partially read. + // + // 3) This instruction has uses, but we don't know how to + // rewrite those uses because of overlaps/ties/unrenamble registers, so + // give up on propagating copies related to these uses + bool isOverlappingUse = false; + Register candidateSrc = {0}, candidateDef = {0}; + for (MCRegUnitIterator RUI(Use, &TRI); RUI.isValid(); ++RUI) { + auto CopyInfoI = Copies.find(*RUI); + if (CopyInfoI == Copies.end()) { + continue; + } + if (!CopyInfoI->second.Avail) { + // Use matches or overlaps with an Src + // Find the actual Src in the copy instruction + MachineInstr *Copy = findCopyDefViaUnit(*RUI, TRI); + MCRegister Src = Copy->getOperand(1).getReg().asMCReg(); + MCRegister Def = Copy->getOperand(0).getReg().asMCReg(); + if (Src != Use) { + // Case (1) + isOverlappingUse = true; + RegsToInvalidate.insert(Src); + RegsToInvalidate.insert(Def); + } else { + candidateSrc = Src; + candidateDef = Def; + break; + } + } else { + // Case (2) + RegsToInvalidate.insert( + CopyInfoI->second.MI->getOperand(0).getReg().asMCReg()); + RegsToInvalidate.insert( + CopyInfoI->second.MI->getOperand(1).getReg().asMCReg()); + } + } + + // Can't have matching and overlapping Srcs at the same time + assert(!candidateSrc || !isOverlappingUse); + + if (candidateSrc) { + // implies: !isOverlappingUse + if (!MI->isDebugValue() && + (hasImplicitOverlap(*MI, MOUse, TRI) || !MOUse.isRenamable() || + MOUse.isTied() || + !isForwardableRegClassCopy(candidateSrc, *MI, OpIdx, TII, TRI))) { + // Case (3) + RegsToInvalidate.insert(candidateSrc.asMCReg()); + RegsToInvalidate.insert(candidateDef.asMCReg()); + continue; + } + + // Add to Uses for future rewrite + auto I = Uses.insert({Use, {MI}}); + if (!I.second) { + I.first->second.push_back(MI); + } + } + } + + for (MCRegister InvalidReg : RegsToInvalidate) { + for (MCRegUnitIterator RUI(InvalidReg, &TRI); RUI.isValid(); ++RUI) + Copies.erase(*RUI); + Uses.erase(InvalidReg); + } } MachineInstr *findAvailCopy(MachineInstr &DestCopy, MCRegister Reg, @@ -245,6 +406,7 @@ void clear() { Copies.clear(); + Uses.clear(); } }; @@ -281,12 +443,9 @@ bool eraseIfRedundant(MachineInstr &Copy, MCRegister Src, MCRegister Def); void forwardUses(MachineInstr &MI); void propagateDefs(MachineInstr &MI); - bool isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, unsigned UseIdx); bool isBackwardPropagatableRegClassCopy(const MachineInstr &Copy, const MachineInstr &UseI, unsigned UseIdx); - bool hasImplicitOverlap(const MachineInstr &MI, const MachineOperand &Use); bool hasOverlappingMultipleDef(const MachineInstr &MI, const MachineOperand &MODef, Register Def); @@ -294,7 +453,7 @@ SmallSetVector MaybeDeadCopies; /// Multimap tracking debug users in current BB - DenseMap> CopyDbgUsers; + DenseMap> CopyDbgUsers; CopyTracker Tracker; @@ -396,70 +555,6 @@ return false; } -/// Decide whether we should forward the source of \param Copy to its use in -/// \param UseI based on the physical register class constraints of the opcode -/// and avoiding introducing more cross-class COPYs. -bool MachineCopyPropagation::isForwardableRegClassCopy(const MachineInstr &Copy, - const MachineInstr &UseI, - unsigned UseIdx) { - - Register CopySrcReg = Copy.getOperand(1).getReg(); - - // If the new register meets the opcode register constraints, then allow - // forwarding. - if (const TargetRegisterClass *URC = - UseI.getRegClassConstraint(UseIdx, TII, TRI)) - return URC->contains(CopySrcReg); - - if (!UseI.isCopy()) - return false; - - /// COPYs don't have register class constraints, so if the user instruction - /// is a COPY, we just try to avoid introducing additional cross-class - /// COPYs. For example: - /// - /// RegClassA = COPY RegClassB // Copy parameter - /// ... - /// RegClassB = COPY RegClassA // UseI parameter - /// - /// which after forwarding becomes - /// - /// RegClassA = COPY RegClassB - /// ... - /// RegClassB = COPY RegClassB - /// - /// so we have reduced the number of cross-class COPYs and potentially - /// introduced a nop COPY that can be removed. - const TargetRegisterClass *UseDstRC = - TRI->getMinimalPhysRegClass(UseI.getOperand(0).getReg()); - - const TargetRegisterClass *SuperRC = UseDstRC; - for (TargetRegisterClass::sc_iterator SuperRCI = UseDstRC->getSuperClasses(); - SuperRC; SuperRC = *SuperRCI++) - if (SuperRC->contains(CopySrcReg)) - return true; - - return false; -} - -/// Check that \p MI does not have implicit uses that overlap with it's \p Use -/// operand (the register being replaced), since these can sometimes be -/// implicitly tied to other operands. For example, on AMDGPU: -/// -/// V_MOVRELS_B32_e32 %VGPR2, %M0, %EXEC, %VGPR2_VGPR3_VGPR4_VGPR5 -/// -/// the %VGPR2 is implicitly tied to the larger reg operand, but we have no -/// way of knowing we need to update the latter when updating the former. -bool MachineCopyPropagation::hasImplicitOverlap(const MachineInstr &MI, - const MachineOperand &Use) { - for (const MachineOperand &MIUse : MI.uses()) - if (&MIUse != &Use && MIUse.isReg() && MIUse.isImplicit() && - MIUse.isUse() && TRI->regsOverlap(Use.getReg(), MIUse.getReg())) - return true; - - return false; -} - /// For an MI that has multiple definitions, check whether \p MI has /// a definition that overlaps with another of its definitions. /// For example, on ARM: umull r9, r9, lr, r0 @@ -526,10 +621,10 @@ if (MRI->isReserved(CopySrcReg) && !MRI->isConstantPhysReg(CopySrcReg)) continue; - if (!isForwardableRegClassCopy(*Copy, MI, OpIdx)) + if (!isForwardableRegClassCopy(CopySrcReg, MI, OpIdx, *TII, *TRI)) continue; - if (hasImplicitOverlap(MI, MOUse)) + if (hasImplicitOverlap(MI, MOUse, *TRI)) continue; // Check that the instruction is not a copy that partially overwrites the @@ -571,7 +666,7 @@ LLVM_DEBUG(dbgs() << "MCP: ForwardCopyPropagateBlock " << MBB.getName() << "\n"); - for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E; ) { + for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) { MachineInstr *MI = &*I; ++I; @@ -784,35 +879,63 @@ if (!MODef.isRenamable()) continue; - MachineInstr *Copy = + auto Copy = Tracker.findAvailBackwardCopy(MI, MODef.getReg().asMCReg(), *TRI); - if (!Copy) + if (!Copy.first) continue; - Register Def = Copy->getOperand(0).getReg(); - Register Src = Copy->getOperand(1).getReg(); + Register Def = Copy.first->getOperand(0).getReg(); + Register Src = Copy.first->getOperand(1).getReg(); if (MODef.getReg() != Src) continue; - if (!isBackwardPropagatableRegClassCopy(*Copy, MI, OpIdx)) + if (!isBackwardPropagatableRegClassCopy(*Copy.first, MI, OpIdx)) continue; - if (hasImplicitOverlap(MI, MODef)) + if (hasImplicitOverlap(MI, MODef, *TRI)) { continue; + } if (hasOverlappingMultipleDef(MI, MODef, Def)) continue; LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MODef.getReg(), TRI) << "\n with " << printReg(Def, TRI) << "\n in " - << MI << " from " << *Copy); + << MI << " from " << *Copy.first); + bool isRenamable = Copy.first->getOperand(0).isRenamable(); MODef.setReg(Def); - MODef.setIsRenamable(Copy->getOperand(0).isRenamable()); + MODef.setIsRenamable(isRenamable); LLVM_DEBUG(dbgs() << "MCP: After replacement: " << MI << "\n"); - MaybeDeadCopies.insert(Copy); + + // Update uses of the original def. + // We don't need the perform checks here, Uses only contains rewrittable + // uses + for (MachineInstr *User : Copy.second) { + for (unsigned UseIdx = 0, UseEnd = User->getNumOperands(); + UseIdx != UseEnd; ++UseIdx) { + MachineOperand &MOUse = User->getOperand(UseIdx); + if (!MOUse.isReg() || MOUse.getReg() != Src) { + continue; + } + // TODO: update dbg instructions, see CopyDbgUsers + LLVM_DEBUG(dbgs() << "MCP: Replacing " << printReg(MOUse.getReg(), TRI) + << "\n with " << printReg(Def, TRI) + << "\n in " << *User << " from " + << *Copy.first); + if (User->isDebugValue()) { + MRI->updateDbgUsersToReg(Def, User); + } else { + MOUse.setReg(Def); + MOUse.setIsRenamable(isRenamable); + } + } + LLVM_DEBUG(dbgs() << "MCP: After replacement: " << *User << "\n"); + } + + MaybeDeadCopies.insert(Copy.first); Changed = true; ++NumCopyBackwardPropagated; } @@ -839,7 +962,11 @@ // Unlike forward cp, we don't invoke propagateDefs here, // just let forward cp do COPY-to-COPY propagation. if (isBackwardPropagatableCopy(*MI, *MRI)) { + // Remove copies related to Src. Src can only possibly appear + // in copy candidates as define, because otherwise the current copy + // won't kill Src. Tracker.invalidateRegister(Src, *TRI); + // Remove copies related to Def. Tracker.invalidateRegister(Def, *TRI); Tracker.trackCopy(MI, *TRI); continue; @@ -856,6 +983,9 @@ } propagateDefs(*MI); + + // Track uses after propagation, because we need the correct Def register. + Tracker.trackUse(MI, *TRI, *TII); for (const MachineOperand &MO : MI->operands()) { if (!MO.isReg()) continue; @@ -863,11 +993,14 @@ if (!MO.getReg()) continue; - if (MO.isDef()) - Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); - - if (MO.readsReg()) + if (MO.isDef()) { + // Even if we did apply propagation, the relevant copy instruction + // would still get invalidated here. The original instruction would + // trigger invalidation because its Def matches the Src of the relevant + // copy, the updated instruction would still do because its Def now + // matches the Def of the relevant copy. Tracker.invalidateRegister(MO.getReg().asMCReg(), *TRI); + } } }