Index: lib/Target/AArch64/AArch64RedundantCopyElimination.cpp =================================================================== --- lib/Target/AArch64/AArch64RedundantCopyElimination.cpp +++ lib/Target/AArch64/AArch64RedundantCopyElimination.cpp @@ -43,6 +43,7 @@ class AArch64RedundantCopyElimination : public MachineFunctionPass { const MachineRegisterInfo *MRI; const TargetRegisterInfo *TRI; + BitVector ClobberedRegs; public: static char ID; @@ -76,6 +77,28 @@ MBB != MI.getOperand(1).getMBB()); } +/// Remember what registers the specified instruction modifies. +static void trackRegDefs(const MachineInstr &MI, BitVector &ClobberedRegs, + const TargetRegisterInfo *TRI) { + for (const MachineOperand &MO : MI.operands()) { + if (MO.isRegMask()) { + ClobberedRegs.setBitsNotInMask(MO.getRegMask()); + continue; + } + + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!Reg) + continue; + if (!MO.isDef()) + continue; + + for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) + ClobberedRegs.set(*AI); + } +} + bool AArch64RedundantCopyElimination::optimizeCopy(MachineBasicBlock *MBB) { // Check if the current basic block has a single predecessor. if (MBB->pred_size() != 1) @@ -91,74 +114,134 @@ if (CompBr == PredMBB->end()) return false; + // Keep track of the earliest point in the PredMBB block where kill markers + // need to be removed if a COPY is removed. + MachineBasicBlock::iterator FirstUse; + // Registers that are known to contain zeros at the start of MBB. + SmallVector KnownZeroRegs; + // Registers clobbered in PredMBB between CompBr instruction and current + // instruction being checked in loop. + ClobberedRegs.reset(); ++CompBr; do { --CompBr; - if (guaranteesZeroRegInBlock(*CompBr, MBB)) - break; - } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); + if (!guaranteesZeroRegInBlock(*CompBr, MBB)) + continue; - // We've not found a CBZ/CBNZ, time to bail out. - if (!guaranteesZeroRegInBlock(*CompBr, MBB)) - return false; + KnownZeroRegs.push_back(CompBr->getOperand(0).getReg()); + FirstUse = CompBr; + // Look backward in PredMBB for COPYs from the known zero reg to + // find other registers that are known to be zero. + for (auto PredI = CompBr;; --PredI) { + if (PredI->isCopy()) { + MCPhysReg CopyDstReg = PredI->getOperand(0).getReg(); + MCPhysReg CopySrcReg = PredI->getOperand(1).getReg(); + for (MCPhysReg KnownZeroReg : KnownZeroRegs) { + if (ClobberedRegs[KnownZeroReg]) + continue; + // If we have X = COPY Y, and Y is known to be zero, then now X is + // known to be zero. + if (CopySrcReg == KnownZeroReg && !ClobberedRegs[CopyDstReg]) { + KnownZeroRegs.push_back(CopyDstReg); + FirstUse = PredI; + break; + } + // If we have X = COPY Y, and X is known to be zero, then now Y is + // known to be zero. + if (CopyDstReg == KnownZeroReg && !ClobberedRegs[CopySrcReg]) { + KnownZeroRegs.push_back(CopySrcReg); + FirstUse = PredI; + break; + } + } + } - unsigned TargetReg = CompBr->getOperand(0).getReg(); - if (!TargetReg) - return false; - assert(TargetRegisterInfo::isPhysicalRegister(TargetReg) && - "Expect physical register"); + // Stop if we get to the beginning of PredMBB. + if (PredI == PredMBB->begin()) + break; - // Remember all registers aliasing with TargetReg. - SmallSetVector TargetRegs; - for (MCRegAliasIterator AI(TargetReg, TRI, true); AI.isValid(); ++AI) - TargetRegs.insert(*AI); + trackRegDefs(*PredI, ClobberedRegs, TRI); + // Stop if all of the known-zero regs have been clobbered. + if (all_of(KnownZeroRegs, [&](MCPhysReg KnownZeroReg) { + return ClobberedRegs[KnownZeroReg]; + })) + break; + } + break; + + } while (CompBr != PredMBB->begin() && CompBr->isTerminator()); + + // We've not found a known zero register, time to bail out. + if (KnownZeroRegs.empty()) + return false; bool Changed = false; + // UsedKnownZeroRegs is the set of KnownZeroRegs that have had uses added to MBB. + SmallSetVector UsedKnownZeroRegs; MachineBasicBlock::iterator LastChange = MBB->begin(); - unsigned SmallestDef = TargetReg; - // Remove redundant Copy instructions unless TargetReg is modified. + // Remove redundant Copy instructions unless KnownZeroReg is modified. for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;) { MachineInstr *MI = &*I; ++I; - if (MI->isCopy() && MI->getOperand(0).isReg() && - MI->getOperand(1).isReg()) { - - unsigned DefReg = MI->getOperand(0).getReg(); - unsigned SrcReg = MI->getOperand(1).getReg(); + bool RemovedCopy = false; + if (MI->isCopy()) { + MCPhysReg DefReg = MI->getOperand(0).getReg(); + MCPhysReg SrcReg = MI->getOperand(1).getReg(); if ((SrcReg == AArch64::XZR || SrcReg == AArch64::WZR) && - !MRI->isReserved(DefReg) && - (TargetReg == DefReg || TRI->isSuperRegister(DefReg, TargetReg))) { - DEBUG(dbgs() << "Remove redundant Copy : "); - DEBUG((MI)->print(dbgs())); - - MI->eraseFromParent(); - Changed = true; - LastChange = I; - NumCopiesRemoved++; - SmallestDef = - TRI->isSubRegister(SmallestDef, DefReg) ? DefReg : SmallestDef; - continue; + !MRI->isReserved(DefReg)) { + for (MCPhysReg KnownZeroReg : KnownZeroRegs) { + if (KnownZeroReg == DefReg || + TRI->isSuperRegister(DefReg, KnownZeroReg)) { + DEBUG(dbgs() << "Remove redundant Copy : " << *MI); + + MI->eraseFromParent(); + Changed = true; + LastChange = I; + NumCopiesRemoved++; + UsedKnownZeroRegs.insert(KnownZeroReg); + RemovedCopy = true; + break; + } + } } } - if (MI->modifiesRegister(TargetReg, TRI)) + // Skip to the next instruction if we removed the COPY from WZR/XZR. + if (RemovedCopy) + continue; + + // Remove any regs the MI clobbers from the KnownZeroRegs set. + for (unsigned RI = 0; RI < KnownZeroRegs.size();) + if (MI->modifiesRegister(KnownZeroRegs[RI], TRI)) { + std::swap(KnownZeroRegs[RI], KnownZeroRegs[KnownZeroRegs.size() - 1]); + KnownZeroRegs.pop_back(); + // Don't increment RI since we need to now check the swapped-in + // KnownZeroRegs[RI]. + } else { + ++RI; + } + + // Continue until the KnownZeroRegs set is empty. + if (KnownZeroRegs.empty()) break; } if (!Changed) return false; - // Otherwise, we have to fixup the use-def chain, starting with the - // CBZ/CBNZ. Conservatively mark as much as we can live. - CompBr->clearRegisterKills(SmallestDef, TRI); - - if (none_of(TargetRegs, [&](unsigned Reg) { return MBB->isLiveIn(Reg); })) - MBB->addLiveIn(TargetReg); + // Add newly used regs to the block's live-in list if they aren't there + // already. + for (MCPhysReg KnownZeroReg : UsedKnownZeroRegs) + if (!MBB->isLiveIn(KnownZeroReg)) + MBB->addLiveIn(KnownZeroReg); - // Clear any kills of TargetReg between CompBr and the last removed COPY. + // Clear kills in the range where changes were made. This is conservative, + // but should be okay since kill markers are being phased out. + for (MachineInstr &MMI : make_range(FirstUse, PredMBB->end())) + MMI.clearKillInfo(); for (MachineInstr &MMI : make_range(MBB->begin(), LastChange)) - MMI.clearRegisterKills(SmallestDef, TRI); + MMI.clearKillInfo(); return true; } @@ -169,6 +252,7 @@ return false; TRI = MF.getSubtarget().getRegisterInfo(); MRI = &MF.getRegInfo(); + ClobberedRegs.resize(TRI->getNumRegs()); bool Changed = false; for (MachineBasicBlock &MBB : MF) Changed |= optimizeCopy(&MBB); Index: test/CodeGen/AArch64/machine-copy-remove.mir =================================================================== --- /dev/null +++ test/CodeGen/AArch64/machine-copy-remove.mir @@ -0,0 +1,295 @@ +# RUN: llc -mtriple=aarch64--linux-gnu -run-pass=aarch64-copyelim %s -verify-machineinstrs -o - 2>&1 | FileCheck %s +--- +# Check that bb.0 COPY is seen through to allow the bb.1 COPY of XZR to be removed. +# CHECK-LABEL: name: test1 +# CHECK-NOT: COPY %xzr +name: test1 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + %x0 = COPY %x1 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with reversed COPY. +# CHECK-LABEL: name: test2 +# CHECK-NOT: COPY %xzr +name: test2 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + %x1 = COPY %x0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with a clobber that prevents removal of the XZR COPY. +# CHECK-LABEL: name: test3 +# CHECK: COPY %xzr +name: test3 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x0 = COPY %x1 + %x1 = LDRXui %x1, 0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test2, but with a clobber that prevents removal of the XZR COPY. +# CHECK-LABEL: name: test4 +# CHECK: COPY %xzr +name: test4 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x1 = COPY %x0 + %x1 = LDRXui %x1, 0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test2, but with a clobber that prevents removal of the XZR COPY. +# CHECK-LABEL: name: test5 +# CHECK: COPY %xzr +name: test5 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x1 = COPY %x0 + %x0 = LDRXui %x1, 0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with two levels of COPYs. +# CHECK-LABEL: name: test6 +# CHECK-NOT: COPY %xzr +name: test6 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x2 = COPY %x0 + %x1 = COPY %x2 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Similar to test1, but with two levels of COPYs and a clobber preventing COPY of XZR removal. +# CHECK-LABEL: name: test7 +# CHECK: COPY %xzr +name: test7 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1, %x2 + + %x2 = COPY %x0 + %x0 = LDRXui %x1, 0 + %x1 = COPY %x2 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Check that the TargetRegs vector clobber update loop in +# AArch64RedundantCopyElimination::optimizeCopy works correctly. +# CHECK-LABEL: name: test8 +# CHECK: x0 = COPY %xzr +# CHECK: x1 = COPY %xzr +name: test8 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + %x1 = COPY %x0 + CBNZX %x1, %bb.2 + + bb.1: + successors: %bb.3 + liveins: %x0, %x2 + + %x0, %x1 = LDPXi %x2, 0 + %x0 = COPY %xzr + %x1 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +... +# Check that copy isn't removed from a block with multiple predecessors. +# CHECK-LABEL: name: test9 +# CHECK: x0 = COPY %xzr +# CHECK-NEXT: B %bb.3 +name: test9 +tracksRegLiveness: true +body: | + bb.0: + successors: %bb.1, %bb.2 + liveins: %x0, %x1 + + CBNZX %x0, %bb.2 + + bb.1: + successors: %bb.3 + liveins: %x0, %x2 + + %x0 = COPY %xzr + B %bb.3 + + bb.2: + successors: %bb.1, %bb.3 + liveins: %x1 + + %x0 = LDRXui %x1, 0 + + CBNZX %x1, %bb.1 + + bb.3: + liveins: %x0 + + RET_ReallyLR implicit %x0 + +...